OCaml HTML5 parser/serialiser based on Python's JustHTML

Compare changes

Choose any two refs to compare.

Changed files
+1965 -656
bin
lib
check
html5rw
parser
js
+71 -3
README.md
··· 1 - # html5rw - Pure OCaml HTML5 Parser 1 + # html5rw - Pure OCaml HTML5 Parser and Conformance Checker 2 2 3 - A pure OCaml HTML5 parser implementing the WHATWG HTML5 parsing specification. 3 + A pure OCaml HTML5 parser and validator implementing the WHATWG HTML5 specification. 4 4 This library passes the html5lib-tests suite and provides full support for 5 - tokenization, tree construction, encoding detection, and CSS selector queries. 5 + tokenization, tree construction, encoding detection, CSS selector queries, and 6 + conformance checking. 6 7 This library was ported from [JustHTML](https://github.com/EmilStenstrom/justhtml/). 7 8 8 9 ## Key Features 9 10 10 11 - **WHATWG Compliant**: Implements the full HTML5 parsing algorithm with proper error recovery 12 + - **Conformance Checker**: Validates HTML5 documents against the WHATWG specification 11 13 - **CSS Selectors**: Query the DOM using standard CSS selector syntax 12 14 - **Streaming I/O**: Uses bytesrw for efficient streaming input/output 13 15 - **Encoding Detection**: Automatic character encoding detection following the WHATWG algorithm 14 16 - **Entity Decoding**: Complete HTML5 named character reference support 17 + - **Multiple Output Formats**: Text, JSON (Nu validator compatible), and GNU-style output 18 + 19 + ## Libraries 20 + 21 + - `html5rw` - Core HTML5 parser 22 + - `html5rw.check` - Conformance checker library 23 + 24 + ## Command Line Tool 25 + 26 + The `html5check` CLI validates HTML5 documents: 27 + 28 + ```bash 29 + # Validate a file 30 + html5check index.html 31 + 32 + # Validate from stdin 33 + cat page.html | html5check - 34 + 35 + # JSON output (Nu validator compatible) 36 + html5check --format=json page.html 37 + 38 + # GNU-style output for IDE integration 39 + html5check --format=gnu page.html 40 + 41 + # Show only errors (suppress warnings) 42 + html5check --errors-only page.html 43 + 44 + # Quiet mode - show only counts 45 + html5check --quiet page.html 46 + ``` 47 + 48 + Exit codes: 0 = valid, 1 = validation errors, 2 = I/O error. 15 49 16 50 ## Usage 51 + 52 + ### Parsing HTML 17 53 18 54 ```ocaml 19 55 open Bytesrw ··· 41 77 let reader = Bytes.Reader.of_string "<p>Fragment content</p>" 42 78 let doc = Html5rw.parse ~fragment_context:ctx reader 43 79 ``` 80 + 81 + ### Validating HTML 82 + 83 + ```ocaml 84 + open Bytesrw 85 + 86 + (* Check HTML from a string *) 87 + let html = "<html><body><p>Hello</p></body></html>" 88 + let reader = Bytes.Reader.of_string html 89 + let result = Htmlrw_check.check reader 90 + 91 + (* Check for errors *) 92 + if Htmlrw_check.has_errors result then 93 + print_endline "Document has errors"; 94 + 95 + (* Get all messages *) 96 + let messages = Htmlrw_check.messages result in 97 + List.iter (fun msg -> 98 + Format.printf "%a@." Htmlrw_check.pp_message msg 99 + ) messages; 100 + 101 + (* Get formatted output *) 102 + let text_output = Htmlrw_check.to_text result in 103 + let json_output = Htmlrw_check.to_json result in 104 + let gnu_output = Htmlrw_check.to_gnu result 105 + ``` 106 + 107 + The checker validates: 108 + - Parse errors (malformed HTML syntax) 109 + - Content model violations (invalid element nesting) 110 + - Attribute errors (invalid or missing required attributes) 111 + - Structural issues (other conformance problems) 44 112 45 113 ## Installation 46 114
+5
bin/dune
··· 1 + (executable 2 + (name html5check) 3 + (public_name html5check) 4 + (package html5rw) 5 + (libraries htmlrw_check html5rw bytesrw cmdliner))
-4
bin/html5check/dune
··· 1 - (executable 2 - (name html5check) 3 - (public_name html5check) 4 - (libraries htmlrw_check html5rw bytesrw cmdliner))
-167
bin/html5check/html5check.ml
··· 1 - (*--------------------------------------------------------------------------- 2 - Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 - SPDX-License-Identifier: MIT 4 - ---------------------------------------------------------------------------*) 5 - 6 - (** html5check - HTML5 conformance checker CLI 7 - 8 - Command line interface for validating HTML5 documents. *) 9 - 10 - open Cmdliner 11 - 12 - let version = "0.1.0" 13 - 14 - (** Exit codes *) 15 - module Exit_code = struct 16 - let ok = Cmd.Exit.ok 17 - let validation_errors = 1 18 - let io_error = 2 19 - end 20 - 21 - (** Read input from file or stdin *) 22 - let read_input file = 23 - try 24 - let ic = 25 - if file = "-" then stdin 26 - else open_in file 27 - in 28 - let reader = Bytesrw.Bytes.Reader.of_in_channel ic in 29 - Ok (reader, ic, file) 30 - with 31 - | Sys_error msg -> 32 - Error (`Io_error (Printf.sprintf "Cannot read file '%s': %s" file msg)) 33 - 34 - (** Format output based on the requested format *) 35 - let format_output format result = 36 - match format with 37 - | `Text -> Htmlrw_check.to_text result 38 - | `Json -> Htmlrw_check.to_json result 39 - | `Gnu -> Htmlrw_check.to_gnu result 40 - 41 - (** Run the validation *) 42 - let run format errors_only exit_zero quiet verbose file = 43 - match read_input file with 44 - | Error (`Io_error msg) -> 45 - if not quiet then Printf.eprintf "Error: %s\n" msg; 46 - Exit_code.io_error 47 - | Ok (reader, ic, system_id) -> 48 - (* Run validation *) 49 - let result = Htmlrw_check.check ~system_id reader in 50 - (* Close input if it's not stdin *) 51 - if file <> "-" then close_in ic; 52 - 53 - (* Get messages based on filtering *) 54 - let messages = 55 - if errors_only then Htmlrw_check.errors result 56 - else Htmlrw_check.messages result 57 - in 58 - 59 - (* Output based on mode *) 60 - if quiet then begin 61 - (* Only show counts *) 62 - let error_count = List.length (Htmlrw_check.errors result) in 63 - let warning_count = List.length (Htmlrw_check.warnings result) in 64 - if errors_only then 65 - Printf.printf "%d error%s\n" error_count (if error_count = 1 then "" else "s") 66 - else 67 - Printf.printf "%d error%s, %d warning%s\n" 68 - error_count (if error_count = 1 then "" else "s") 69 - warning_count (if warning_count = 1 then "" else "s") 70 - end else begin 71 - (* Format and print messages *) 72 - let output = format_output format result in 73 - if output <> "" then print_string output; 74 - 75 - (* Show summary if verbose *) 76 - if verbose && messages <> [] then begin 77 - let error_count = List.length (Htmlrw_check.errors result) in 78 - let warning_count = List.length (Htmlrw_check.warnings result) in 79 - Printf.eprintf "\nSummary: %d error%s, %d warning%s\n" 80 - error_count (if error_count = 1 then "" else "s") 81 - warning_count (if warning_count = 1 then "" else "s") 82 - end 83 - end; 84 - 85 - (* Determine exit code *) 86 - if exit_zero || not (Htmlrw_check.has_errors result) then 87 - Exit_code.ok 88 - else 89 - Exit_code.validation_errors 90 - 91 - (** Command line argument definitions *) 92 - 93 - let format_arg = 94 - let formats = [("text", `Text); ("json", `Json); ("gnu", `Gnu)] in 95 - let doc = 96 - "Output format. $(docv) must be one of $(b,text) (human-readable, default), \ 97 - $(b,json) (Nu validator compatible JSON), or $(b,gnu) (GNU-style for IDE integration)." 98 - in 99 - Arg.(value & opt (enum formats) `Text & info ["format"] ~docv:"FORMAT" ~doc) 100 - 101 - let errors_only_arg = 102 - let doc = "Only show errors (suppress warnings)." in 103 - Arg.(value & flag & info ["errors-only"] ~doc) 104 - 105 - let exit_zero_arg = 106 - let doc = 107 - "Always exit with status code 0, even if validation errors are found. \ 108 - Useful for CI pipelines where you want to collect validation results \ 109 - but not fail the build." 110 - in 111 - Arg.(value & flag & info ["exit-zero"] ~doc) 112 - 113 - let quiet_arg = 114 - let doc = "Quiet mode - only show error and warning counts, no details." in 115 - Arg.(value & flag & info ["q"; "quiet"] ~doc) 116 - 117 - let verbose_arg = 118 - let doc = "Verbose mode - show additional information including summary." in 119 - Arg.(value & flag & info ["v"; "verbose"] ~doc) 120 - 121 - let file_arg = 122 - let doc = 123 - "HTML file to validate. Use $(b,-) to read from standard input. \ 124 - If no file is specified, reads from stdin." 125 - in 126 - Arg.(value & pos 0 string "-" & info [] ~docv:"FILE" ~doc) 127 - 128 - let cmd = 129 - let doc = "validate HTML5 documents for conformance" in 130 - let man = [ 131 - `S Manpage.s_description; 132 - `P "$(tname) validates HTML5 documents against the WHATWG HTML5 specification. \ 133 - It reports parse errors, structural validation issues, and conformance problems."; 134 - `P "The validator checks for:"; 135 - `I ("Parse errors", "Malformed HTML syntax according to the WHATWG specification"); 136 - `I ("Content model violations", "Elements in invalid parent/child relationships"); 137 - `I ("Attribute errors", "Invalid or missing required attributes"); 138 - `I ("Structural issues", "Other conformance problems"); 139 - `S Manpage.s_options; 140 - `S "OUTPUT FORMATS"; 141 - `P "The validator supports three output formats:"; 142 - `I ("$(b,text)", "Human-readable format showing file:line:col: severity: message"); 143 - `I ("$(b,json)", "JSON format compatible with the Nu Html Checker (v.Nu)"); 144 - `I ("$(b,gnu)", "GNU-style format for IDE integration (file:line:column: message)"); 145 - `S "EXIT STATUS"; 146 - `P "The validator exits with one of the following status codes:"; 147 - `I ("0", "No validation errors found (or --exit-zero was specified)"); 148 - `I ("1", "Validation errors were found"); 149 - `I ("2", "File not found or I/O error"); 150 - `S Manpage.s_examples; 151 - `P "Validate a file:"; 152 - `Pre " $(mname) index.html"; 153 - `P "Validate from stdin:"; 154 - `Pre " cat page.html | $(mname) -"; 155 - `P "Show only errors in JSON format:"; 156 - `Pre " $(mname) --format=json --errors-only page.html"; 157 - `P "Quiet mode for CI:"; 158 - `Pre " $(mname) --quiet --exit-zero index.html"; 159 - `S Manpage.s_bugs; 160 - `P "Report bugs at https://tangled.org/@anil.recoil.org/ocaml-html5rw/issues"; 161 - ] in 162 - let info = Cmd.info "html5check" ~version ~doc ~man in 163 - Cmd.v info Term.(const run $ format_arg $ errors_only_arg $ exit_zero_arg 164 - $ quiet_arg $ verbose_arg $ file_arg) 165 - 166 - let main () = Cmd.eval' cmd 167 - let () = Stdlib.exit (main ())
+167
bin/html5check.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** html5check - HTML5 conformance checker CLI 7 + 8 + Command line interface for validating HTML5 documents. *) 9 + 10 + open Cmdliner 11 + 12 + let version = "0.1.0" 13 + 14 + (** Exit codes *) 15 + module Exit_code = struct 16 + let ok = Cmd.Exit.ok 17 + let validation_errors = 1 18 + let io_error = 2 19 + end 20 + 21 + (** Read input from file or stdin *) 22 + let read_input file = 23 + try 24 + let ic = 25 + if file = "-" then stdin 26 + else open_in file 27 + in 28 + let reader = Bytesrw.Bytes.Reader.of_in_channel ic in 29 + Ok (reader, ic, file) 30 + with 31 + | Sys_error msg -> 32 + Error (`Io_error (Printf.sprintf "Cannot read file '%s': %s" file msg)) 33 + 34 + (** Format output based on the requested format *) 35 + let format_output format result = 36 + match format with 37 + | `Text -> Htmlrw_check.to_text result 38 + | `Json -> Htmlrw_check.to_json result 39 + | `Gnu -> Htmlrw_check.to_gnu result 40 + 41 + (** Run the validation *) 42 + let run format errors_only exit_zero quiet verbose file = 43 + match read_input file with 44 + | Error (`Io_error msg) -> 45 + if not quiet then Printf.eprintf "Error: %s\n" msg; 46 + Exit_code.io_error 47 + | Ok (reader, ic, system_id) -> 48 + (* Run validation *) 49 + let result = Htmlrw_check.check ~system_id reader in 50 + (* Close input if it's not stdin *) 51 + if file <> "-" then close_in ic; 52 + 53 + (* Get messages based on filtering *) 54 + let messages = 55 + if errors_only then Htmlrw_check.errors result 56 + else Htmlrw_check.messages result 57 + in 58 + 59 + (* Output based on mode *) 60 + if quiet then begin 61 + (* Only show counts *) 62 + let error_count = List.length (Htmlrw_check.errors result) in 63 + let warning_count = List.length (Htmlrw_check.warnings result) in 64 + if errors_only then 65 + Printf.printf "%d error%s\n" error_count (if error_count = 1 then "" else "s") 66 + else 67 + Printf.printf "%d error%s, %d warning%s\n" 68 + error_count (if error_count = 1 then "" else "s") 69 + warning_count (if warning_count = 1 then "" else "s") 70 + end else begin 71 + (* Format and print messages *) 72 + let output = format_output format result in 73 + if output <> "" then print_string output; 74 + 75 + (* Show summary if verbose *) 76 + if verbose && messages <> [] then begin 77 + let error_count = List.length (Htmlrw_check.errors result) in 78 + let warning_count = List.length (Htmlrw_check.warnings result) in 79 + Printf.eprintf "\nSummary: %d error%s, %d warning%s\n" 80 + error_count (if error_count = 1 then "" else "s") 81 + warning_count (if warning_count = 1 then "" else "s") 82 + end 83 + end; 84 + 85 + (* Determine exit code *) 86 + if exit_zero || not (Htmlrw_check.has_errors result) then 87 + Exit_code.ok 88 + else 89 + Exit_code.validation_errors 90 + 91 + (** Command line argument definitions *) 92 + 93 + let format_arg = 94 + let formats = [("text", `Text); ("json", `Json); ("gnu", `Gnu)] in 95 + let doc = 96 + "Output format. $(docv) must be one of $(b,text) (human-readable, default), \ 97 + $(b,json) (Nu validator compatible JSON), or $(b,gnu) (GNU-style for IDE integration)." 98 + in 99 + Arg.(value & opt (enum formats) `Text & info ["format"] ~docv:"FORMAT" ~doc) 100 + 101 + let errors_only_arg = 102 + let doc = "Only show errors (suppress warnings)." in 103 + Arg.(value & flag & info ["errors-only"] ~doc) 104 + 105 + let exit_zero_arg = 106 + let doc = 107 + "Always exit with status code 0, even if validation errors are found. \ 108 + Useful for CI pipelines where you want to collect validation results \ 109 + but not fail the build." 110 + in 111 + Arg.(value & flag & info ["exit-zero"] ~doc) 112 + 113 + let quiet_arg = 114 + let doc = "Quiet mode - only show error and warning counts, no details." in 115 + Arg.(value & flag & info ["q"; "quiet"] ~doc) 116 + 117 + let verbose_arg = 118 + let doc = "Verbose mode - show additional information including summary." in 119 + Arg.(value & flag & info ["v"; "verbose"] ~doc) 120 + 121 + let file_arg = 122 + let doc = 123 + "HTML file to validate. Use $(b,-) to read from standard input. \ 124 + If no file is specified, reads from stdin." 125 + in 126 + Arg.(value & pos 0 string "-" & info [] ~docv:"FILE" ~doc) 127 + 128 + let cmd = 129 + let doc = "validate HTML5 documents for conformance" in 130 + let man = [ 131 + `S Manpage.s_description; 132 + `P "$(tname) validates HTML5 documents against the WHATWG HTML5 specification. \ 133 + It reports parse errors, structural validation issues, and conformance problems."; 134 + `P "The validator checks for:"; 135 + `I ("Parse errors", "Malformed HTML syntax according to the WHATWG specification"); 136 + `I ("Content model violations", "Elements in invalid parent/child relationships"); 137 + `I ("Attribute errors", "Invalid or missing required attributes"); 138 + `I ("Structural issues", "Other conformance problems"); 139 + `S Manpage.s_options; 140 + `S "OUTPUT FORMATS"; 141 + `P "The validator supports three output formats:"; 142 + `I ("$(b,text)", "Human-readable format showing file:line:col: severity: message"); 143 + `I ("$(b,json)", "JSON format compatible with the Nu Html Checker (v.Nu)"); 144 + `I ("$(b,gnu)", "GNU-style format for IDE integration (file:line:column: message)"); 145 + `S "EXIT STATUS"; 146 + `P "The validator exits with one of the following status codes:"; 147 + `I ("0", "No validation errors found (or --exit-zero was specified)"); 148 + `I ("1", "Validation errors were found"); 149 + `I ("2", "File not found or I/O error"); 150 + `S Manpage.s_examples; 151 + `P "Validate a file:"; 152 + `Pre " $(mname) index.html"; 153 + `P "Validate from stdin:"; 154 + `Pre " cat page.html | $(mname) -"; 155 + `P "Show only errors in JSON format:"; 156 + `Pre " $(mname) --format=json --errors-only page.html"; 157 + `P "Quiet mode for CI:"; 158 + `Pre " $(mname) --quiet --exit-zero index.html"; 159 + `S Manpage.s_bugs; 160 + `P "Report bugs at https://tangled.org/@anil.recoil.org/ocaml-html5rw/issues"; 161 + ] in 162 + let info = Cmd.info "html5check" ~version ~doc ~man in 163 + Cmd.v info Term.(const run $ format_arg $ errors_only_arg $ exit_zero_arg 164 + $ quiet_arg $ verbose_arg $ file_arg) 165 + 166 + let main () = Cmd.eval' cmd 167 + let () = Stdlib.exit (main ())
+15
dune-project
··· 30 30 (odoc :with-doc) 31 31 (jsont (>= 0.2.0)) 32 32 (cmdliner (>= 1.3.0)))) 33 + 34 + (package 35 + (name html5rw-js) 36 + (synopsis "Browser-based HTML5 parser via js_of_ocaml/wasm_of_ocaml") 37 + (description 38 + "JavaScript and WebAssembly builds of the html5rw HTML5 parser for browser use. \ 39 + Includes a main validator library, web worker for background validation, and \ 40 + browser-based test runner.") 41 + (depends 42 + (ocaml (>= 5.1.0)) 43 + (html5rw (= :version)) 44 + (js_of_ocaml (>= 5.0)) 45 + (js_of_ocaml-ppx (>= 5.0)) 46 + (wasm_of_ocaml-compiler (>= 5.0)) 47 + (brr (>= 0.0.6))))
+35
html5rw-js.opam
··· 1 + # This file is generated by dune, edit dune-project instead 2 + opam-version: "2.0" 3 + synopsis: "Browser-based HTML5 parser via js_of_ocaml/wasm_of_ocaml" 4 + description: 5 + "JavaScript and WebAssembly builds of the html5rw HTML5 parser for browser use. Includes a main validator library, web worker for background validation, and browser-based test runner." 6 + maintainer: ["Anil Madhavapeddy <anil@recoil.org>"] 7 + authors: ["Anil Madhavapeddy <anil@recoil.org>"] 8 + license: "MIT" 9 + homepage: "https://tangled.org/@anil.recoil.org/ocaml-html5rw" 10 + bug-reports: "https://tangled.org/@anil.recoil.org/ocaml-html5rw/issues" 11 + depends: [ 12 + "dune" {>= "3.20"} 13 + "ocaml" {>= "5.1.0"} 14 + "html5rw" {= version} 15 + "js_of_ocaml" {>= "5.0"} 16 + "js_of_ocaml-ppx" {>= "5.0"} 17 + "wasm_of_ocaml-compiler" {>= "5.0"} 18 + "brr" {>= "0.0.6"} 19 + "odoc" {with-doc} 20 + ] 21 + build: [ 22 + ["dune" "subst"] {dev} 23 + [ 24 + "dune" 25 + "build" 26 + "-p" 27 + name 28 + "-j" 29 + jobs 30 + "@install" 31 + "@runtest" {with-test} 32 + "@doc" {with-doc} 33 + ] 34 + ] 35 + x-maintenance-intent: ["(latest)"]
+2 -2
lib/check/attr_utils.ml
··· 3 3 type attrs = (string * string) list 4 4 5 5 let has_attr name attrs = 6 - List.exists (fun (n, _) -> String.lowercase_ascii n = name) attrs 6 + List.exists (fun (n, _) -> Astring.String.Ascii.lowercase n = name) attrs 7 7 8 8 let get_attr name attrs = 9 9 List.find_map (fun (n, v) -> 10 - if String.lowercase_ascii n = name then Some v else None 10 + if Astring.String.Ascii.lowercase n = name then Some v else None 11 11 ) attrs 12 12 13 13 let get_attr_or name ~default attrs =
+2 -1
lib/check/checker_registry.ml
··· 22 22 Hashtbl.replace reg "source" Source_checker.checker; 23 23 Hashtbl.replace reg "label" Label_checker.checker; 24 24 Hashtbl.replace reg "ruby" Ruby_checker.checker; 25 - Hashtbl.replace reg "h1" H1_checker.checker; 25 + Hashtbl.replace reg "heading" Heading_checker.checker; 26 26 Hashtbl.replace reg "srcset-sizes" Srcset_sizes_checker.checker; 27 27 Hashtbl.replace reg "autofocus" Autofocus_checker.checker; 28 28 Hashtbl.replace reg "option" Option_checker.checker; ··· 36 36 Hashtbl.replace reg "xhtml-content" Xhtml_content_checker.checker; 37 37 Hashtbl.replace reg "lang-detecting" Lang_detecting_checker.checker; 38 38 Hashtbl.replace reg "unknown-element" Unknown_element_checker.checker; 39 + Hashtbl.replace reg "content" Content_checker.checker; 39 40 reg 40 41 41 42 let register registry name checker = Hashtbl.replace registry name checker
+64 -7
lib/check/content_model/content_checker.ml
··· 30 30 | Some spec -> 31 31 List.exists (fun cat -> Element_spec.has_category spec cat) cats) 32 32 | Content_model.Elements names -> 33 - List.mem (String.lowercase_ascii element_name) 34 - (List.map String.lowercase_ascii names) 33 + List.mem (Astring.String.Ascii.lowercase element_name) 34 + (List.map Astring.String.Ascii.lowercase names) 35 35 | Content_model.Mixed cats -> ( 36 36 match Element_registry.get registry element_name with 37 37 | None -> false ··· 79 79 (`Element (`Not_allowed_as_child (`Child name, `Parent prohibited)))) 80 80 spec.Element_spec.prohibited_ancestors 81 81 82 + (* Check if element is allowed via permitted_parents *) 83 + let is_permitted_parent registry child_name parent_name = 84 + match Element_registry.get registry child_name with 85 + | None -> false 86 + | Some spec -> 87 + match spec.Element_spec.permitted_parents with 88 + | None -> false 89 + | Some parents -> 90 + List.mem (Astring.String.Ascii.lowercase parent_name) 91 + (List.map Astring.String.Ascii.lowercase parents) 92 + 93 + (* Check if a specific element is in the ancestor stack *) 94 + let has_ancestor state ancestor_name = 95 + List.exists (fun ctx -> 96 + String.equal (Astring.String.Ascii.lowercase ctx.name) 97 + (Astring.String.Ascii.lowercase ancestor_name) 98 + ) state.ancestor_stack 99 + 100 + (* Check if an attribute exists in raw attrs list *) 101 + let has_raw_attr name attrs = 102 + List.exists (fun (n, _) -> 103 + Astring.String.Ascii.lowercase n = Astring.String.Ascii.lowercase name 104 + ) attrs 105 + 106 + (* Special cases for content model validation: 107 + - dt/dd inside div is only valid when dl is an ancestor (div as grouping in dl) 108 + - meta with property/itemprop/name attribute in body is valid (RDFa/microdata) 109 + - link with itemprop in body is valid (microdata) *) 110 + let is_special_case_allowed state child_name parent_name raw_attrs = 111 + let child_lower = Astring.String.Ascii.lowercase child_name in 112 + let parent_lower = Astring.String.Ascii.lowercase parent_name in 113 + (* dt/dd inside div is allowed when dl is an ancestor *) 114 + if (child_lower = "dt" || child_lower = "dd") && parent_lower = "div" then 115 + has_ancestor state "dl" 116 + (* meta in body is allowed with property (RDFa), itemprop (microdata), or name+content (meta tags) *) 117 + else if child_lower = "meta" && parent_lower <> "head" then 118 + has_raw_attr "property" raw_attrs || 119 + has_raw_attr "itemprop" raw_attrs || 120 + (has_raw_attr "name" raw_attrs && has_raw_attr "content" raw_attrs) 121 + (* link in body is allowed with itemprop (microdata) or property (RDFa) *) 122 + else if child_lower = "link" && parent_lower <> "head" then 123 + has_raw_attr "itemprop" raw_attrs || has_raw_attr "property" raw_attrs 124 + (* Custom elements (with hyphen) are valid HTML5 and are flow content *) 125 + else if String.contains child_lower '-' then 126 + true 127 + else 128 + false 129 + 82 130 (* Validate that a child element is allowed *) 83 - let validate_child_element state child_name collector = 131 + let validate_child_element state child_name raw_attrs collector = 84 132 match state.ancestor_stack with 85 133 | [] -> 86 134 (* Root level - only html allowed *) 87 - if not (String.equal (String.lowercase_ascii child_name) "html") then 135 + if not (String.equal (Astring.String.Ascii.lowercase child_name) "html") then 88 136 Message_collector.add_typed collector 89 137 (`Generic (Printf.sprintf "Element '%s' not allowed at document root (only 'html' allowed)" child_name)) 90 138 | parent :: _ -> 91 139 let content_model = parent.spec.Element_spec.content_model in 92 - if not (matches_content_model state.registry child_name content_model) then 140 + (* Check content model, permitted_parents, or special cases *) 141 + let allowed_by_content_model = matches_content_model state.registry child_name content_model in 142 + let allowed_by_permitted_parents = is_permitted_parent state.registry child_name parent.name in 143 + let allowed_by_special_case = is_special_case_allowed state child_name parent.name raw_attrs in 144 + if not (allowed_by_content_model || allowed_by_permitted_parents || allowed_by_special_case) then 93 145 Message_collector.add_typed collector 94 146 (`Element (`Not_allowed_as_child (`Child child_name, `Parent parent.name))) 95 147 96 148 let start_element state ~element collector = 97 149 let name = Tag.tag_to_string element.Element.tag in 150 + let raw_attrs = element.Element.raw_attrs in 98 151 99 152 (* Check if we're inside a foreign (SVG/MathML) context *) 100 153 let in_foreign_context = match state.ancestor_stack with ··· 127 180 match spec_opt with 128 181 | None -> 129 182 (* Unknown element - first check if it's allowed in current context *) 130 - validate_child_element state name collector 183 + validate_child_element state name raw_attrs collector; 184 + (* Push unknown element onto stack with default flow content model *) 185 + let spec = Element_spec.make ~name ~content_model:(Content_model.Categories [Content_category.Flow]) () in 186 + let context = { name; spec; children_count = 0; is_foreign = false } in 187 + state.ancestor_stack <- context :: state.ancestor_stack 131 188 | Some spec -> 132 189 (* Check prohibited ancestors *) 133 190 check_prohibited_ancestors state name spec collector; 134 191 135 192 (* Validate this element is allowed as child of parent *) 136 - validate_child_element state name collector; 193 + validate_child_element state name raw_attrs collector; 137 194 138 195 (* Push element context onto stack *) 139 196 let context = { name; spec; children_count = 0; is_foreign = false } in
+2 -2
lib/check/content_model/element_registry.ml
··· 3 3 let create () = Hashtbl.create 128 4 4 5 5 let register registry spec = 6 - let name = String.lowercase_ascii spec.Element_spec.name in 6 + let name = Astring.String.Ascii.lowercase spec.Element_spec.name in 7 7 Hashtbl.replace registry name spec 8 8 9 9 let get registry name = 10 - let name = String.lowercase_ascii name in 10 + let name = Astring.String.Ascii.lowercase name in 11 11 Hashtbl.find_opt registry name 12 12 13 13 let list_names registry =
+3 -1
lib/check/content_model/elements_embedded.ml
··· 31 31 () 32 32 33 33 let img = 34 + (* Note: img is only Interactive when it has usemap attribute; 35 + we omit Interactive from static categories since usemap is rare *) 34 36 Element_spec.make ~name:"img" ~void:true 35 - ~categories:[ Flow; Phrasing; Embedded; Palpable; Interactive ] 37 + ~categories:[ Flow; Phrasing; Embedded; Palpable ] 36 38 ~content_model:Nothing 37 39 ~attrs: 38 40 [
+1 -1
lib/check/content_model/elements_form.ml
··· 97 97 let select = 98 98 Element_spec.make ~name:"select" 99 99 ~categories:[Flow; Phrasing; Interactive; Palpable] 100 - ~content_model:(Elements ["option"; "optgroup"; "script"; "template"]) 100 + ~content_model:(Elements ["option"; "optgroup"; "hr"; "script"; "template"]) 101 101 ~attrs:[ 102 102 Attr_spec.make "autocomplete" ~datatype:"autocomplete" (); 103 103 Attr_spec.make "disabled" ~datatype:"boolean" ();
-1
lib/check/content_model/elements_table.ml
··· 34 34 ~categories:[] 35 35 ~content_model:(Categories [ Flow ]) 36 36 ~permitted_parents:[ "table" ] 37 - ~prohibited_ancestors:[ "table" ] 38 37 ~attrs:[] () 39 38 40 39 let colgroup =
+28 -6
lib/check/datatype/datatype.ml
··· 12 12 13 13 (* Helper utilities *) 14 14 15 + (** Character predicates *) 16 + 15 17 let is_whitespace = function 16 18 | ' ' | '\t' | '\n' | '\r' | '\012' (* FF *) -> true 17 19 | _ -> false 18 20 19 21 let is_ascii_digit = function '0' .. '9' -> true | _ -> false 20 22 21 - let to_ascii_lowercase c = 22 - match c with 'A' .. 'Z' -> Char.chr (Char.code c + 32) | _ -> c 23 + let is_lower_alpha = function 'a' .. 'z' -> true | _ -> false 24 + 25 + let is_upper_alpha = function 'A' .. 'Z' -> true | _ -> false 26 + 27 + let is_alpha c = is_lower_alpha c || is_upper_alpha c 28 + 29 + let is_alphanumeric c = is_alpha c || is_ascii_digit c 23 30 24 - let string_to_ascii_lowercase s = 25 - String.map to_ascii_lowercase s 31 + let is_hex_digit = function 32 + | '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true 33 + | _ -> false 34 + 35 + (** Case conversion - delegated to Astring *) 36 + 37 + (* Removed to_ascii_lowercase and string_to_ascii_lowercase - use Astring.String.Ascii.lowercase instead *) 38 + 39 + (** String predicates *) 40 + 41 + let is_non_empty s = String.trim s <> "" 42 + 43 + let is_all_digits s = String.length s > 0 && String.for_all is_ascii_digit s 44 + 45 + let is_all_alpha s = String.length s > 0 && String.for_all is_alpha s 46 + 47 + let is_all_alphanumeric s = String.length s > 0 && String.for_all is_alphanumeric s 26 48 27 49 let trim_html_spaces s = 28 50 let len = String.length s in ··· 52 74 let make_enum ~name ~values ?(allow_empty = true) () : t = 53 75 (* Pre-compute hashtable for O(1) membership *) 54 76 let values_tbl = Hashtbl.create (List.length values) in 55 - List.iter (fun v -> Hashtbl.add values_tbl (String.lowercase_ascii v) ()) values; 77 + List.iter (fun v -> Hashtbl.add values_tbl (Astring.String.Ascii.lowercase v) ()) values; 56 78 let values_str = String.concat ", " (List.map (Printf.sprintf "'%s'") values) in 57 79 (module struct 58 80 let name = name 59 81 let validate s = 60 - let s_lower = string_to_ascii_lowercase s in 82 + let s_lower = Astring.String.Ascii.lowercase s in 61 83 if (allow_empty && s = "") || Hashtbl.mem values_tbl s_lower then Ok () 62 84 else Error (Printf.sprintf "The value '%s' is not a valid %s value. Expected %s%s." 63 85 s name (if allow_empty then "empty string, " else "") values_str)
+36 -5
lib/check/datatype/datatype.mli
··· 27 27 (** Check if a value is valid *) 28 28 val is_valid : t -> string -> bool 29 29 30 - (** Helper utilities for implementing datatype validators. *) 30 + (** {1 Helper utilities for implementing datatype validators} *) 31 + 32 + (** {2 Character predicates} *) 31 33 32 34 (** Check if a character is HTML5 whitespace (space, tab, LF, FF, or CR). *) 33 35 val is_whitespace : char -> bool ··· 35 37 (** Check if a character is an ASCII digit (0-9). *) 36 38 val is_ascii_digit : char -> bool 37 39 38 - (** Convert an ASCII character to lowercase. *) 39 - val to_ascii_lowercase : char -> char 40 + (** Check if a character is a lowercase ASCII letter (a-z). *) 41 + val is_lower_alpha : char -> bool 42 + 43 + (** Check if a character is an uppercase ASCII letter (A-Z). *) 44 + val is_upper_alpha : char -> bool 45 + 46 + (** Check if a character is an ASCII letter (a-z or A-Z). *) 47 + val is_alpha : char -> bool 48 + 49 + (** Check if a character is an ASCII letter or digit. *) 50 + val is_alphanumeric : char -> bool 51 + 52 + (** Check if a character is a hexadecimal digit (0-9, a-f, A-F). *) 53 + val is_hex_digit : char -> bool 54 + 55 + (** {2 Case conversion} *) 56 + 57 + (** Case conversion functions removed - use Astring.String.Ascii.lowercase instead *) 40 58 41 - (** Convert an ASCII string to lowercase. *) 42 - val string_to_ascii_lowercase : string -> string 59 + (** {2 String predicates} *) 60 + 61 + (** Check if a string has non-whitespace content after trimming. *) 62 + val is_non_empty : string -> bool 63 + 64 + (** Check if all characters in a non-empty string are ASCII digits. *) 65 + val is_all_digits : string -> bool 66 + 67 + (** Check if all characters in a non-empty string are ASCII letters. *) 68 + val is_all_alpha : string -> bool 69 + 70 + (** Check if all characters in a non-empty string are ASCII letters or digits. *) 71 + val is_all_alphanumeric : string -> bool 72 + 73 + (** {2 String manipulation} *) 43 74 44 75 (** Trim HTML5 whitespace from both ends of a string. *) 45 76 val trim_html_spaces : string -> string
+8 -9
lib/check/datatype/dt_autocomplete.ml
··· 2 2 3 3 (* Use shared utilities from Datatype *) 4 4 let is_whitespace = Datatype.is_whitespace 5 - let to_ascii_lowercase = Datatype.to_ascii_lowercase 5 + let to_ascii_lowercase c = 6 + match c with 'A' .. 'Z' -> Char.chr (Char.code c + 32) | _ -> c 7 + 8 + (* Use Astring for string operations *) 9 + let is_prefix = Astring.String.is_prefix 6 10 7 11 (** Trim whitespace from string and collapse internal whitespace *) 8 12 let trim_whitespace s = ··· 104 108 (** Split string on whitespace - uses shared utility *) 105 109 let split_on_whitespace = Datatype.split_on_whitespace 106 110 107 - (** Check if string starts with prefix *) 108 - let starts_with s prefix = 109 - String.length s >= String.length prefix 110 - && String.sub s 0 (String.length prefix) = prefix 111 - 112 111 (** Validate detail tokens *) 113 112 let check_tokens tokens = 114 113 let tokens = ref tokens in ··· 116 115 117 116 (* Check for section-* *) 118 117 (match !tokens with 119 - | token :: rest when starts_with token "section-" -> 118 + | token :: rest when is_prefix ~affix:"section-" token -> 120 119 tokens := rest 121 120 | _ -> ()); 122 121 ··· 145 144 146 145 (* Check if any token in the list is a section-* indicator *) 147 146 let find_section tokens = 148 - List.find_opt (fun t -> starts_with t "section-") tokens 147 + List.find_opt (fun t -> is_prefix ~affix:"section-" t) tokens 149 148 in 150 149 151 150 (* Check if webauthn appears anywhere except as the very last token *) ··· 207 206 (Printf.sprintf 208 207 "The token \"%s\" must only appear before any autofill field names." 209 208 token) 210 - | token :: _ when starts_with token "section-" -> 209 + | token :: _ when is_prefix ~affix:"section-" token -> 211 210 Error 212 211 "A \"section-*\" indicator must only appear as the first token in a \ 213 212 list of autofill detail tokens."
+2 -2
lib/check/datatype/dt_boolean.ml
··· 22 22 match s with 23 23 | "" | "true" | "false" -> Ok () 24 24 | _ -> 25 - let s_lower = Datatype.string_to_ascii_lowercase s in 26 - let attr_lower = Datatype.string_to_ascii_lowercase attr_name in 25 + let s_lower = Astring.String.Ascii.lowercase s in 26 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 27 27 if s_lower = attr_lower then Ok () 28 28 else 29 29 Error
+1 -1
lib/check/datatype/dt_button_type.ml
··· 7 7 let name = "button-type" 8 8 9 9 let validate s = 10 - let s_lower = Datatype.string_to_ascii_lowercase s in 10 + let s_lower = Astring.String.Ascii.lowercase s in 11 11 if List.mem s_lower valid_types then Ok () 12 12 else 13 13 Error
+3 -6
lib/check/datatype/dt_charset.ml
··· 1 1 (** Helper functions for charset validation *) 2 2 3 3 let is_valid_charset_char c = 4 - (c >= '0' && c <= '9') || 5 - (c >= 'a' && c <= 'z') || 6 - (c >= 'A' && c <= 'Z') || 4 + Datatype.is_alphanumeric c || 7 5 c = '-' || c = '!' || c = '#' || c = '$' || c = '%' || c = '&' || 8 6 c = '\'' || c = '+' || c = '_' || c = '`' || c = '{' || c = '}' || 9 7 c = '~' || c = '^' 10 8 11 - let to_lower s = String.lowercase_ascii s 9 + let to_lower = Astring.String.Ascii.lowercase 12 10 13 11 (** Common encoding labels recognized by WHATWG Encoding Standard. 14 12 This is a subset of the full list. *) ··· 74 72 module Meta_charset = struct 75 73 let name = "legacy character encoding declaration" 76 74 77 - let is_whitespace c = 78 - c = ' ' || c = '\t' || c = '\n' || c = '\012' || c = '\r' 75 + let is_whitespace = Datatype.is_whitespace 79 76 80 77 let validate s = 81 78 let lower = to_lower s in
+2 -3
lib/check/datatype/dt_color.ml
··· 154 154 ] 155 155 156 156 (** Check if character is hex digit *) 157 - let is_hex_digit c = 158 - (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') 157 + let is_hex_digit = Datatype.is_hex_digit 159 158 160 159 (** Validate hex color (#RGB or #RRGGBB) *) 161 160 let validate_hex_color s = ··· 209 208 let name = "color" 210 209 211 210 let validate s = 212 - let s = String.trim s |> String.lowercase_ascii in 211 + let s = String.trim s |> Astring.String.Ascii.lowercase in 213 212 if String.length s = 0 then Error "Color value must not be empty" 214 213 else if List.mem s named_colors then Ok () 215 214 else if String.length s > 0 && s.[0] = '#' then validate_hex_color s
+1 -1
lib/check/datatype/dt_contenteditable.ml
··· 4 4 let name = "contenteditable" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "true" | "false" | "plaintext-only" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_crossorigin.ml
··· 4 4 let name = "crossorigin" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "anonymous" | "use-credentials" -> Ok () 10 10 | _ ->
+1 -4
lib/check/datatype/dt_datetime.ml
··· 1 1 (** Helper functions for datetime validation *) 2 2 3 - let is_digit c = c >= '0' && c <= '9' 4 - 5 - let is_all_digits s = 6 - String.for_all is_digit s 3 + let is_all_digits = Datatype.is_all_digits 7 4 8 5 let parse_int s = 9 6 try Some (int_of_string s)
+1 -1
lib/check/datatype/dt_decoding.ml
··· 4 4 let name = "decoding" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "sync" | "async" | "auto" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_dir.ml
··· 4 4 let name = "dir" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "ltr" | "rtl" | "auto" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_draggable.ml
··· 4 4 let name = "draggable" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "true" | "false" -> Ok () 10 10 | _ ->
+1 -3
lib/check/datatype/dt_email.ml
··· 2 2 3 3 (** Helper to check if a character is valid in email local/domain parts *) 4 4 let is_email_char c = 5 - (c >= 'a' && c <= 'z') 6 - || (c >= 'A' && c <= 'Z') 7 - || (c >= '0' && c <= '9') 5 + Datatype.is_alphanumeric c 8 6 || c = '.' || c = '-' || c = '_' || c = '+' || c = '=' 9 7 10 8 (** Validate a single email address using simplified rules *)
+3 -3
lib/check/datatype/dt_email.mli
··· 6 6 (** Valid email address validator. 7 7 8 8 Validates a single email address. Uses simplified validation rules: 9 - - Must contain exactly one '@' character 10 - - Local part (before @) must be non-empty 11 - - Domain part (after @) must be non-empty and contain at least one '.' 9 + - Must contain exactly one ['@'] character 10 + - Local part (before ['@']) must be non-empty 11 + - Domain part (after ['@']) must be non-empty and contain at least one ['.'] 12 12 - Only ASCII characters allowed *) 13 13 module Email : Datatype.S 14 14
+1 -1
lib/check/datatype/dt_enterkeyhint.ml
··· 4 4 let name = "enterkeyhint" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "enter" | "done" | "go" | "next" | "previous" | "search" | "send" -> 10 10 Ok ()
+1 -1
lib/check/datatype/dt_fetchpriority.ml
··· 4 4 let name = "fetchpriority" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "high" | "low" | "auto" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_form_enctype.ml
··· 12 12 let name = "form-enctype" 13 13 14 14 let validate s = 15 - let s_lower = Datatype.string_to_ascii_lowercase s in 15 + let s_lower = Astring.String.Ascii.lowercase s in 16 16 if List.mem s_lower valid_enctypes then Ok () 17 17 else 18 18 Error
+1 -1
lib/check/datatype/dt_form_method.ml
··· 7 7 let name = "form-method" 8 8 9 9 let validate s = 10 - let s_lower = Datatype.string_to_ascii_lowercase s in 10 + let s_lower = Astring.String.Ascii.lowercase s in 11 11 if List.mem s_lower valid_methods then Ok () 12 12 else 13 13 Error
+1 -1
lib/check/datatype/dt_hidden.ml
··· 4 4 let name = "hidden" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "hidden" | "until-found" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_input_type.ml
··· 31 31 let name = "input-type" 32 32 33 33 let validate s = 34 - let s_lower = Datatype.string_to_ascii_lowercase s in 34 + let s_lower = Astring.String.Ascii.lowercase s in 35 35 if List.mem s_lower valid_types then Ok () 36 36 else 37 37 Error
+1 -1
lib/check/datatype/dt_inputmode.ml
··· 4 4 let name = "inputmode" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "none" | "text" | "decimal" | "numeric" | "tel" | "search" 10 10 | "email" | "url" ->
+1 -1
lib/check/datatype/dt_integrity.ml
··· 49 49 "Hash value '%s' must be in format 'algorithm-base64hash'" trimmed) 50 50 | Some dash_pos -> 51 51 let algorithm = String.sub trimmed 0 dash_pos in 52 - let algorithm_lower = Datatype.string_to_ascii_lowercase algorithm in 52 + let algorithm_lower = Astring.String.Ascii.lowercase algorithm in 53 53 if not (List.mem algorithm_lower valid_algorithms) then 54 54 Error 55 55 (Printf.sprintf
+1 -1
lib/check/datatype/dt_kind.ml
··· 4 4 let name = "kind" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "subtitles" | "captions" | "descriptions" | "chapters" | "metadata" -> Ok () 10 10 | _ ->
+4 -17
lib/check/datatype/dt_language.ml
··· 2 2 3 3 let q = Error_code.q 4 4 5 - let is_lower_alpha c = c >= 'a' && c <= 'z' 6 - let is_upper_alpha c = c >= 'A' && c <= 'Z' 7 - let is_alpha c = is_lower_alpha c || is_upper_alpha c 8 - let is_digit c = c >= '0' && c <= '9' 9 - let is_alphanumeric c = is_alpha c || is_digit c 10 - 11 - let is_all_alpha s = 12 - String.for_all is_alpha s 13 - 14 - let _is_all_digits s = 15 - String.for_all is_digit s 16 - 17 - let is_all_alphanumeric s = 18 - String.for_all is_alphanumeric s 19 - 20 - let to_lower s = 21 - String.lowercase_ascii s 5 + (* Use shared character predicates from Datatype *) 6 + let is_all_alpha = Datatype.is_all_alpha 7 + let is_all_alphanumeric = Datatype.is_all_alphanumeric 8 + let to_lower = Astring.String.Ascii.lowercase 22 9 23 10 (** Valid extlang subtags per IANA language-subtag-registry. 24 11 Extlangs are 3-letter subtags that follow the primary language.
+1 -1
lib/check/datatype/dt_list_type.ml
··· 26 26 let name = "ul-type" 27 27 28 28 let validate s = 29 - let s_lower = Datatype.string_to_ascii_lowercase s in 29 + let s_lower = Astring.String.Ascii.lowercase s in 30 30 if List.mem s_lower valid_ul_types then Ok () 31 31 else 32 32 Error
+1 -1
lib/check/datatype/dt_loading.ml
··· 4 4 let name = "loading" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "lazy" | "eager" -> Ok () 10 10 | _ ->
+44 -25
lib/check/datatype/dt_media_query.ml
··· 70 70 (** Media query keywords (unused but kept for documentation) *) 71 71 let _media_keywords = [ "and"; "not"; "only" ] 72 72 73 - (** Check if character is whitespace *) 74 - let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r' 73 + let is_whitespace = Datatype.is_whitespace 75 74 76 75 (** Check if character can start an identifier *) 77 76 let is_ident_start c = 78 - (c >= 'a' && c <= 'z') 79 - || (c >= 'A' && c <= 'Z') 80 - || c = '_' || c = '-' || Char.code c >= 128 77 + Datatype.is_alpha c || c = '_' || c = '-' || Char.code c >= 128 81 78 82 79 (** Check if character can be in an identifier *) 83 80 let is_ident_char c = 84 - is_ident_start c || (c >= '0' && c <= '9') 81 + is_ident_start c || Datatype.is_ascii_digit c 82 + 83 + (** Unicode case folding for case-insensitive comparison. 84 + 85 + WORKAROUND: This is a temporary domain-specific implementation because 86 + the uucp library fails to compile with wasm_of_ocaml due to "too many 87 + locals" errors. Once uucp supports WASM, restore the proper implementation: 88 + 89 + {[ 90 + (* Proper uucp-based case folding: *) 91 + let case_fold s = 92 + let buf = Buffer.create (String.length s) in 93 + let add_uchar u = Uutf.Buffer.add_utf_8 buf u in 94 + let fold_char () _pos = function 95 + | `Malformed _ -> () 96 + | `Uchar u -> 97 + match Uucp.Case.Fold.fold u with 98 + | `Self -> add_uchar u 99 + | `Uchars us -> List.iter add_uchar us 100 + in 101 + Uutf.String.fold_utf_8 fold_char () s; 102 + Buffer.contents buf 103 + ]} 85 104 86 - (** Unicode case-fold for Turkish dotted-I etc *) 87 - let lowercase_unicode s = 88 - (* Handle special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE -> i *) 105 + This workaround handles the Turkish dotted-I (U+0130 -> 'i' + U+0307) 106 + which is the main non-ASCII case relevant for CSS media query identifiers. *) 107 + let case_fold s = 89 108 let buf = Buffer.create (String.length s) in 109 + let len = String.length s in 90 110 let i = ref 0 in 91 - while !i < String.length s do 111 + while !i < len do 92 112 let c = s.[!i] in 93 - if c = '\xc4' && !i + 1 < String.length s && s.[!i + 1] = '\xb0' then begin 94 - (* U+0130 -> 'i' + U+0307 (combining dot above), but for simplicity just 'i' followed by U+0307 *) 113 + (* U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE encoded as UTF-8: 0xC4 0xB0 *) 114 + if c = '\xc4' && !i + 1 < len && s.[!i + 1] = '\xb0' then begin 115 + (* Case fold to 'i' + U+0307 (combining dot above) = 0x69 0xCC 0x87 *) 95 116 Buffer.add_string buf "i\xcc\x87"; 96 117 i := !i + 2 97 118 end else begin ··· 151 172 let trimmed = String.trim s in 152 173 if String.length trimmed >= 3 then begin 153 174 let suffix = String.sub trimmed (String.length trimmed - 3) 3 in 154 - if String.lowercase_ascii suffix = "and" then 175 + if Astring.String.Ascii.lowercase suffix = "and" then 155 176 Error "Parse Error." 156 177 else if String.length trimmed >= 4 then begin 157 178 let suffix4 = String.sub trimmed (String.length trimmed - 4) 4 in 158 - if String.lowercase_ascii suffix4 = "and(" then 179 + if Astring.String.Ascii.lowercase suffix4 = "and(" then 159 180 Error "Parse Error." 160 181 else 161 182 validate_media_query_content trimmed ··· 201 222 let has_not = ref false in 202 223 (match read_ident () with 203 224 | Some w -> 204 - let w_lower = String.lowercase_ascii w in 225 + let w_lower = Astring.String.Ascii.lowercase w in 205 226 if w_lower = "only" then (has_only := true; skip_ws ()) 206 227 else if w_lower = "not" then (has_not := true; skip_ws ()) 207 228 else i := !i - String.length w (* put back *) ··· 222 243 match read_ident () with 223 244 | None -> Error "Parse Error." 224 245 | Some media_type -> 225 - let mt_lower = lowercase_unicode media_type in 246 + let mt_lower = case_fold media_type in 226 247 (* Check for deprecated media type *) 227 248 if List.mem mt_lower deprecated_media_types then 228 249 Error (Printf.sprintf "The media \"%s\" has been deprecated" mt_lower) ··· 238 259 match read_ident () with 239 260 | None -> Error "Parse Error." 240 261 | Some kw -> 241 - let kw_lower = String.lowercase_ascii kw in 262 + let kw_lower = Astring.String.Ascii.lowercase kw in 242 263 if kw_lower <> "and" then Error "Parse Error." 243 264 else begin 244 265 (* Check that there was whitespace before 'and' *) ··· 267 288 match read_ident () with 268 289 | None -> Error "Parse Error." 269 290 | Some kw2 -> 270 - let kw2_lower = String.lowercase_ascii kw2 in 291 + let kw2_lower = Astring.String.Ascii.lowercase kw2 in 271 292 if kw2_lower <> "and" then Error "Parse Error." 272 293 else begin 273 294 skip_ws (); ··· 295 316 match String.index_opt content ':' with 296 317 | None -> 297 318 (* Just feature name - boolean feature or range syntax *) 298 - let feature_lower = String.lowercase_ascii content in 319 + let feature_lower = Astring.String.Ascii.lowercase content in 299 320 if List.mem feature_lower deprecated_media_features then 300 321 Error (Printf.sprintf "Deprecated media feature \"%s\". For guidance, see the Deprecated Media Features section in the current Media Queries specification." feature_lower) 301 322 else if List.mem feature_lower valid_media_features then ··· 305 326 | Some colon_pos -> 306 327 let feature = String.trim (String.sub content 0 colon_pos) in 307 328 let value = String.trim (String.sub content (colon_pos + 1) (String.length content - colon_pos - 1)) in 308 - let feature_lower = String.lowercase_ascii feature in 329 + let feature_lower = Astring.String.Ascii.lowercase feature in 309 330 310 331 (* Check for deprecated features *) 311 332 if List.mem feature_lower deprecated_media_features then ··· 341 362 if List.mem feature length_features then begin 342 363 (* Must be a valid length: number followed by unit *) 343 364 let value = String.trim value in 344 - let is_digit c = c >= '0' && c <= '9' in 345 365 346 366 (* Parse number - includes sign, digits, and decimal point *) 347 367 let i = ref 0 in 348 368 let len = String.length value in 349 - while !i < len && (is_digit value.[!i] || value.[!i] = '.' || value.[!i] = '-' || value.[!i] = '+') do 369 + while !i < len && (Datatype.is_ascii_digit value.[!i] || value.[!i] = '.' || value.[!i] = '-' || value.[!i] = '+') do 350 370 incr i 351 371 done; 352 372 let num_part = String.sub value 0 !i in ··· 367 387 else if unit_part = "" then 368 388 Error "only \"0\" can be a \"unit\". You must put a unit after your number" 369 389 else begin 370 - let unit_lower = String.lowercase_ascii unit_part in 390 + let unit_lower = Astring.String.Ascii.lowercase unit_part in 371 391 if List.mem unit_lower valid_length_units then Ok () 372 392 else if List.mem unit_lower valid_resolution_units then 373 393 Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value base_feature) ··· 377 397 end else if List.mem feature color_features then begin 378 398 (* Must be an integer *) 379 399 let value = String.trim value in 380 - let is_digit c = c >= '0' && c <= '9' in 381 - if String.length value > 0 && String.for_all is_digit value then Ok () 400 + if Datatype.is_all_digits value then Ok () 382 401 else 383 402 Error (Printf.sprintf "\"%s\" is not a \"%s\" value" value base_feature) 384 403 end else
+1 -1
lib/check/datatype/dt_media_query.mli
··· 8 8 9 9 (** Media query validator. 10 10 11 - Validates CSS media queries used in media attributes and CSS @media rules. 11 + Validates CSS media queries used in media attributes and CSS [@@media] rules. 12 12 13 13 Examples: 14 14 - "screen"
+2 -3
lib/check/datatype/dt_mime.ml
··· 1 1 (** MIME type validation based on RFC 2045 and HTML5 spec *) 2 2 3 - (** Check if character is whitespace *) 4 - let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r' 3 + let is_whitespace = Datatype.is_whitespace 5 4 6 5 (** Check if character is a token character (RFC 2045) *) 7 6 let is_token_char c = ··· 92 91 if is_token_char c then parse In_subtype (i + 1) 93 92 else if c = ';' then 94 93 (* Check if this is a JavaScript MIME type *) 95 - let mime_type = String.sub s 0 i |> String.lowercase_ascii in 94 + let mime_type = String.sub s 0 i |> Astring.String.Ascii.lowercase in 96 95 if List.mem mime_type javascript_mime_types then 97 96 Error 98 97 "A JavaScript MIME type must not contain any characters after \
+1 -1
lib/check/datatype/dt_popover.ml
··· 4 4 let name = "popover" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "auto" | "manual" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_preload.ml
··· 7 7 let name = "preload" 8 8 9 9 let validate s = 10 - let s_lower = Datatype.string_to_ascii_lowercase s in 10 + let s_lower = Astring.String.Ascii.lowercase s in 11 11 if List.mem s_lower valid_preloads then Ok () 12 12 else 13 13 Error
+1 -1
lib/check/datatype/dt_referrer.ml
··· 4 4 let name = "referrerpolicy" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" 10 10 | "no-referrer"
+1 -1
lib/check/datatype/dt_scope.ml
··· 7 7 let name = "scope" 8 8 9 9 let validate s = 10 - let s_lower = Datatype.string_to_ascii_lowercase s in 10 + let s_lower = Astring.String.Ascii.lowercase s in 11 11 if List.mem s_lower valid_scopes then Ok () 12 12 else 13 13 Error
+1 -1
lib/check/datatype/dt_shape.ml
··· 4 4 let name = "shape" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "default" | "rect" | "circle" | "poly" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_spellcheck.ml
··· 4 4 let name = "spellcheck" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "true" | "false" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_target.ml
··· 8 8 if String.length s = 0 then Error "Browsing context name must not be empty" 9 9 else if s.[0] = '_' then 10 10 (* If starts with underscore, must be a special keyword *) 11 - let lower = Datatype.string_to_ascii_lowercase s in 11 + let lower = Astring.String.Ascii.lowercase s in 12 12 if List.mem lower special_keywords then Ok () 13 13 else 14 14 Error
+1 -1
lib/check/datatype/dt_translate.ml
··· 4 4 let name = "translate" 5 5 6 6 let validate s = 7 - let s_lower = Datatype.string_to_ascii_lowercase s in 7 + let s_lower = Astring.String.Ascii.lowercase s in 8 8 match s_lower with 9 9 | "" | "yes" | "no" -> Ok () 10 10 | _ ->
+1 -1
lib/check/datatype/dt_url.ml
··· 30 30 match s.[i] with 31 31 | ':' -> 32 32 let scheme = 33 - String.sub s start (i - start) |> Datatype.string_to_ascii_lowercase 33 + String.sub s start (i - start) |> Astring.String.Ascii.lowercase 34 34 in 35 35 let rest = String.sub s (i + 1) (len - i - 1) in 36 36 Some (scheme, rest)
+1 -1
lib/check/datatype/dt_wrap.ml
··· 7 7 let name = "wrap" 8 8 9 9 let validate s = 10 - let s_lower = Datatype.string_to_ascii_lowercase s in 10 + let s_lower = Astring.String.Ascii.lowercase s in 11 11 if List.mem s_lower valid_wraps then Ok () 12 12 else 13 13 Error
+3 -3
lib/check/element/attr.ml
··· 571 571 572 572 (** Parse a single attribute name-value pair to typed attribute *) 573 573 let parse_attr name value : t = 574 - let name_lower = String.lowercase_ascii name in 575 - let value_lower = String.lowercase_ascii value in 574 + let name_lower = Astring.String.Ascii.lowercase name in 575 + let value_lower = Astring.String.Ascii.lowercase value in 576 576 match name_lower with 577 577 (* Global attributes *) 578 578 | "id" -> `Id value ··· 875 875 (** Get rel attribute as list of link types (space-separated, lowercased per HTML5 spec) *) 876 876 let get_rel_list attrs = 877 877 match get_rel attrs with 878 - | Some s -> List.map String.lowercase_ascii (Datatype.split_on_whitespace s) 878 + | Some s -> List.map Astring.String.Ascii.lowercase (Datatype.split_on_whitespace s) 879 879 | None -> [] 880 880 881 881 (** Get headers attribute as raw string *)
+4 -4
lib/check/element/element.ml
··· 21 21 22 22 (** Parse element-specific type attribute based on tag *) 23 23 let parse_type_attr (tag : Tag.html_tag) value : Attr.t = 24 - let value_lower = String.lowercase_ascii value in 24 + let value_lower = Astring.String.Ascii.lowercase value in 25 25 match tag with 26 26 | `Input -> 27 27 (match Attr.parse_input_type value_lower with ··· 42 42 (** Parse attributes with element context for type attribute *) 43 43 let parse_attrs_for_tag (tag : Tag.element_tag) (raw_attrs : (string * string) list) : Attr.t list = 44 44 List.map (fun (name, value) -> 45 - let name_lower = String.lowercase_ascii name in 45 + let name_lower = Astring.String.Ascii.lowercase name in 46 46 if name_lower = "type" then 47 47 match tag with 48 48 | Tag.Html html_tag -> parse_type_attr html_tag value ··· 274 274 (** Get raw attribute value (from original attrs) *) 275 275 let get_raw_attr name elem = 276 276 List.find_map (fun (n, v) -> 277 - if String.lowercase_ascii n = String.lowercase_ascii name then Some v else None 277 + if Astring.String.Ascii.lowercase n = Astring.String.Ascii.lowercase name then Some v else None 278 278 ) elem.raw_attrs 279 279 280 280 (** Check if raw attribute exists *) 281 281 let has_raw_attr name elem = 282 282 List.exists (fun (n, _) -> 283 - String.lowercase_ascii n = String.lowercase_ascii name 283 + Astring.String.Ascii.lowercase n = Astring.String.Ascii.lowercase name 284 284 ) elem.raw_attrs 285 285 286 286 (** {1 Pattern Matching Helpers} *)
+3 -3
lib/check/element/element.mli
··· 45 45 46 46 (** A typed HTML element. 47 47 48 - @field tag The element's tag classification 49 - @field attrs Typed attributes parsed from raw input 50 - @field raw_attrs Original attribute name-value pairs for fallback *) 48 + - [tag]: The element's tag classification 49 + - [attrs]: Typed attributes parsed from raw input 50 + - [raw_attrs]: Original attribute name-value pairs for fallback *) 51 51 type t = { 52 52 tag : Tag.element_tag; 53 53 attrs : Attr.t list;
+3 -3
lib/check/element/tag.ml
··· 234 234 (** Check if a name is a valid custom element name (contains hyphen, not reserved) *) 235 235 let is_custom_element_name name = 236 236 String.contains name '-' && 237 - not (String.starts_with ~prefix:"xml" (String.lowercase_ascii name)) && 238 - not (String.equal (String.lowercase_ascii name) "annotation-xml") 237 + not (String.starts_with ~prefix:"xml" (Astring.String.Ascii.lowercase name)) && 238 + not (String.equal (Astring.String.Ascii.lowercase name) "annotation-xml") 239 239 240 240 (** SVG namespace URI *) 241 241 let svg_namespace = "http://www.w3.org/2000/svg" ··· 255 255 256 256 (** Convert tag name and optional namespace to element_tag *) 257 257 let tag_of_string ?namespace name = 258 - let name_lower = String.lowercase_ascii name in 258 + let name_lower = Astring.String.Ascii.lowercase name in 259 259 match namespace with 260 260 | Some ns when is_svg_namespace ns -> Svg name (* Preserve original case for SVG *) 261 261 | Some ns when is_mathml_namespace ns -> MathML name (* Preserve original case for MathML *)
+7 -7
lib/check/htmlrw_check.mli
··· 21 21 22 22 {2 Handling Specific Errors} 23 23 24 - Use pattern matching on {!field-message.error_code} for fine-grained control: 24 + Use pattern matching on [error_code] for fine-grained control: 25 25 26 26 {[ 27 27 List.iter (fun msg -> ··· 188 188 189 189 (** Human-readable text format. 190 190 191 - {v 191 + {v 192 192 file.html:5.3: error [missing-alt]: Element "img" is missing required attribute "alt". 193 - v} *) 193 + v} *) 194 194 val to_text : t -> string 195 195 196 196 (** JSON format compatible with Nu HTML Validator. 197 197 198 - {v 198 + {v 199 199 {"messages":[{"type":"error","message":"...","firstLine":5,"firstColumn":3}]} 200 - v} *) 200 + v} *) 201 201 val to_json : t -> string 202 202 203 203 (** GNU error format for IDE integration. 204 204 205 - {v 205 + {v 206 206 file.html:5:3: error: Element "img" is missing required attribute "alt". 207 - v} *) 207 + v} *) 208 208 val to_gnu : t -> string 209 209 210 210
+6 -3
lib/check/message_format.ml
··· 90 90 91 91 Object (with_extract, Meta.none) 92 92 93 - let format_json ?system_id messages = 93 + let messages_to_json ?system_id messages = 94 94 let open Jsont in 95 95 let msg_array = Array (List.map (message_to_json ?system_id) messages, Meta.none) in 96 - let obj = Object ([ (("messages", Meta.none), msg_array) ], Meta.none) in 97 - match Jsont_bytesrw.encode_string ~format:Minify json obj with 96 + Object ([ (("messages", Meta.none), msg_array) ], Meta.none) 97 + 98 + let format_json ?system_id messages = 99 + let obj = messages_to_json ?system_id messages in 100 + match Jsont_bytesrw.encode_string ~format:Minify Jsont.json obj with 98 101 | Ok s -> s 99 102 | Error e -> failwith ("JSON encoding error: " ^ e)
+33
lib/check/message_format.mli
··· 26 26 27 27 @param system_id Optional default system identifier for messages without location. *) 28 28 val format_gnu : ?system_id:string -> Message.t list -> string 29 + 30 + (** {1 JSON Value Builders} 31 + 32 + These functions return [Jsont.json] values that can be reused 33 + for custom JSON encoding scenarios. *) 34 + 35 + (** Convert a single message to JSON AST. 36 + 37 + Produces JSON compatible with the Nu HTML Validator format: 38 + {[ 39 + { 40 + "type": "error", 41 + "message": "...", 42 + "subType": "error-code", 43 + "url": "...", 44 + "firstLine": 1, 45 + "firstColumn": 1, 46 + ... 47 + } 48 + ]} 49 + 50 + @param system_id Default system identifier for messages without location.system_id. *) 51 + val message_to_json : ?system_id:string -> Message.t -> Jsont.json 52 + 53 + (** Convert a message list to JSON AST with wrapper object. 54 + 55 + Produces JSON with a "messages" array: 56 + {[ 57 + { "messages": [...] } 58 + ]} 59 + 60 + @param system_id Default system identifier for messages without location.system_id. *) 61 + val messages_to_json : ?system_id:string -> Message.t list -> Jsont.json
+1 -1
lib/check/semantic/form_checker.ml
··· 12 12 13 13 (** Check if autocomplete value contains webauthn token *) 14 14 let contains_webauthn value = 15 - let lower = String.lowercase_ascii value in 15 + let lower = Astring.String.Ascii.lowercase value in 16 16 let tokens = String.split_on_char ' ' lower |> List.filter (fun s -> String.length s > 0) in 17 17 List.mem "webauthn" tokens 18 18
+2 -2
lib/check/semantic/lang_detecting_checker.ml
··· 54 54 let get_lang_code lang = 55 55 (* Extract primary language subtag *) 56 56 match String.split_on_char '-' lang with 57 - | code :: _ -> String.lowercase_ascii code 57 + | code :: _ -> Astring.String.Ascii.lowercase code 58 58 | [] -> "" 59 59 60 60 (* Create detector lazily with deterministic seed *) ··· 324 324 | None -> 325 325 Message_collector.add_typed collector 326 326 (`I18n (`Missing_dir_rtl (`Language detected_name))) 327 - | Some dir when String.lowercase_ascii dir <> "rtl" -> 327 + | Some dir when Astring.String.Ascii.lowercase dir <> "rtl" -> 328 328 Message_collector.add_typed collector 329 329 (`I18n (`Wrong_dir (`Language detected_name, `Declared dir))) 330 330 | _ -> ()
+11 -13
lib/check/semantic/nesting_checker.ml
··· 190 190 state.ancestor_flags <- empty_flags () 191 191 192 192 (** Get attribute value by name from attribute list. *) 193 - let get_attr attrs name = 194 - List.assoc_opt name attrs 193 + let get_attr = Attr_utils.get_attr 195 194 196 195 (** Check if an attribute exists. *) 197 - let has_attr attrs name = 198 - get_attr attrs name <> None 196 + let has_attr = Attr_utils.has_attr 199 197 200 198 (** Check if element is interactive based on its attributes. *) 201 199 let is_interactive_element name attrs = 202 200 match name with 203 - | "a" -> has_attr attrs "href" 204 - | "audio" | "video" -> has_attr attrs "controls" 205 - | "img" | "object" -> has_attr attrs "usemap" 201 + | "a" -> has_attr "href" attrs 202 + | "audio" | "video" -> has_attr "controls" attrs 203 + | "img" | "object" -> has_attr "usemap" attrs 206 204 | "input" -> 207 - (match get_attr attrs "type" with 205 + (match get_attr "type" attrs with 208 206 | Some "hidden" -> false 209 207 | _ -> true) 210 208 | "button" | "details" | "embed" | "iframe" | "label" | "select" ··· 239 237 (* Determine attribute to mention in error messages *) 240 238 let attr = 241 239 match name with 242 - | "a" when has_attr attrs "href" -> Some "href" 243 - | "audio" when has_attr attrs "controls" -> Some "controls" 244 - | "video" when has_attr attrs "controls" -> Some "controls" 245 - | "img" when has_attr attrs "usemap" -> Some "usemap" 246 - | "object" when has_attr attrs "usemap" -> Some "usemap" 240 + | "a" when has_attr "href" attrs -> Some "href" 241 + | "audio" when has_attr "controls" attrs -> Some "controls" 242 + | "video" when has_attr "controls" attrs -> Some "controls" 243 + | "img" when has_attr "usemap" attrs -> Some "usemap" 244 + | "object" when has_attr "usemap" attrs -> Some "usemap" 247 245 | _ -> None 248 246 in 249 247
+2 -2
lib/check/semantic/obsolete_checker.ml
··· 260 260 match element.Element.tag with 261 261 | Tag.Html _ -> 262 262 let name = Tag.tag_to_string element.tag in 263 - let name_lower = String.lowercase_ascii name in 263 + let name_lower = Astring.String.Ascii.lowercase name in 264 264 let attrs = element.raw_attrs in 265 265 266 266 (* Track head context *) ··· 275 275 276 276 (* Check for obsolete attributes *) 277 277 List.iter (fun (attr_name, _attr_value) -> 278 - let attr_lower = String.lowercase_ascii attr_name in 278 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 279 279 280 280 (* Special handling for scoped attribute on style *) 281 281 if attr_lower = "scoped" && name_lower = "style" then begin
+1 -1
lib/check/semantic/required_attr_checker.ml
··· 120 120 (* popover attribute must have valid value *) 121 121 match Attr_utils.get_attr "popover" attrs with 122 122 | Some value -> 123 - let value_lower = String.lowercase_ascii value in 123 + let value_lower = Astring.String.Ascii.lowercase value in 124 124 (* Valid values: empty string, auto, manual, hint *) 125 125 if value_lower <> "" && value_lower <> "auto" && value_lower <> "manual" && value_lower <> "hint" then 126 126 Message_collector.add_typed collector
+34 -34
lib/check/specialized/aria_checker.ml
··· 309 309 else 310 310 String.split_on_char ' ' trimmed 311 311 |> List.filter (fun s -> String.trim s <> "") 312 - |> List.map String.lowercase_ascii 312 + |> List.map Astring.String.Ascii.lowercase 313 313 314 314 (** Get the implicit role for an HTML element. *) 315 315 let get_implicit_role element_name attrs = 316 316 (* Check for input element with type attribute *) 317 317 if element_name = "input" then begin 318 - match List.assoc_opt "type" attrs with 318 + match Attr_utils.get_attr "type" attrs with 319 319 | Some input_type -> 320 - let input_type = String.lowercase_ascii input_type in 320 + let input_type = Astring.String.Ascii.lowercase input_type in 321 321 begin match Hashtbl.find_opt input_types_with_implicit_role input_type with 322 322 | Some role -> Some role 323 323 | None -> ··· 332 332 end 333 333 (* Check for area element - implicit role depends on href attribute *) 334 334 else if element_name = "area" then begin 335 - match List.assoc_opt "href" attrs with 335 + match Attr_utils.get_attr "href" attrs with 336 336 | Some _ -> Some "link" (* area with href has implicit role "link" *) 337 337 | None -> Some "generic" (* area without href has no corresponding role, treated as generic *) 338 338 end 339 339 (* Check for a element - implicit role depends on href attribute *) 340 340 else if element_name = "a" then begin 341 - match List.assoc_opt "href" attrs with 341 + match Attr_utils.get_attr "href" attrs with 342 342 | Some _ -> Some "link" (* a with href has implicit role "link" *) 343 343 | None -> Some "generic" (* a without href has no corresponding role, treated as generic *) 344 344 end ··· 430 430 match element.Element.tag with 431 431 | Tag.Html _ -> 432 432 let name = Tag.tag_to_string element.tag in 433 - let name_lower = String.lowercase_ascii name in 433 + let name_lower = Astring.String.Ascii.lowercase name in 434 434 let attrs = element.raw_attrs in 435 - let role_attr = List.assoc_opt "role" attrs in 436 - let aria_label = List.assoc_opt "aria-label" attrs in 437 - let aria_labelledby = List.assoc_opt "aria-labelledby" attrs in 438 - let aria_braillelabel = List.assoc_opt "aria-braillelabel" attrs in 435 + let role_attr = Attr_utils.get_attr "role" attrs in 436 + let aria_label = Attr_utils.get_attr "aria-label" attrs in 437 + let aria_labelledby = Attr_utils.get_attr "aria-labelledby" attrs in 438 + let aria_braillelabel = Attr_utils.get_attr "aria-braillelabel" attrs in 439 439 let has_aria_label = match aria_label with Some v -> String.trim v <> "" | None -> false in 440 440 let has_aria_labelledby = match aria_labelledby with Some v -> String.trim v <> "" | None -> false in 441 441 let has_aria_braillelabel = match aria_braillelabel with Some v -> String.trim v <> "" | None -> false in ··· 459 459 460 460 (* Track active tabs and tabpanel roles for end_document validation *) 461 461 if List.mem "tab" explicit_roles then begin 462 - let aria_selected = List.assoc_opt "aria-selected" attrs in 462 + let aria_selected = Attr_utils.get_attr "aria-selected" attrs in 463 463 if aria_selected = Some "true" then state.has_active_tab <- true 464 464 end; 465 465 if List.mem "tabpanel" explicit_roles then state.has_tabpanel <- true; 466 466 467 467 (* Track visible main elements (explicit role=main or implicit main role) *) 468 468 let is_hidden = 469 - let aria_hidden = List.assoc_opt "aria-hidden" attrs in 469 + let aria_hidden = Attr_utils.get_attr "aria-hidden" attrs in 470 470 aria_hidden = Some "true" 471 471 in 472 472 if not is_hidden then begin ··· 489 489 (* Check br/wbr aria-* attribute restrictions - not allowed *) 490 490 if name_lower = "br" || name_lower = "wbr" then begin 491 491 List.iter (fun (attr_name, _) -> 492 - let attr_lower = String.lowercase_ascii attr_name in 492 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 493 493 if String.starts_with ~prefix:"aria-" attr_lower && 494 494 attr_lower <> "aria-hidden" then 495 495 Message_collector.add_typed collector ··· 515 515 516 516 (* Check for img with empty alt having role attribute *) 517 517 if name_lower = "img" then begin 518 - let alt_value = List.assoc_opt "alt" attrs in 518 + let alt_value = Attr_utils.get_attr "alt" attrs in 519 519 match alt_value with 520 520 | Some alt when String.trim alt = "" -> 521 521 (* img with empty alt must not have role attribute *) ··· 526 526 527 527 (* Check for input[type=checkbox][role=button] requires aria-pressed *) 528 528 if name_lower = "input" then begin 529 - let input_type = match List.assoc_opt "type" attrs with 530 - | Some t -> String.lowercase_ascii t 529 + let input_type = match Attr_utils.get_attr "type" attrs with 530 + | Some t -> Astring.String.Ascii.lowercase t 531 531 | None -> "text" 532 532 in 533 533 if input_type = "checkbox" && List.mem "button" explicit_roles then begin 534 - let has_aria_pressed = List.assoc_opt "aria-pressed" attrs <> None in 534 + let has_aria_pressed = Attr_utils.has_attr "aria-pressed" attrs in 535 535 if not has_aria_pressed then 536 536 Message_collector.add_typed collector (`Input `Checkbox_needs_aria_pressed) 537 537 end ··· 566 566 567 567 (* Check for aria-hidden="true" on body element *) 568 568 if name_lower = "body" then begin 569 - let aria_hidden = List.assoc_opt "aria-hidden" attrs in 569 + let aria_hidden = Attr_utils.get_attr "aria-hidden" attrs in 570 570 match aria_hidden with 571 571 | Some "true" -> 572 572 Message_collector.add_typed collector (`Aria `Hidden_on_body) ··· 574 574 end; 575 575 576 576 (* Check for aria-checked on input[type=checkbox] *) 577 - let aria_checked = List.assoc_opt "aria-checked" attrs in 577 + let aria_checked = Attr_utils.get_attr "aria-checked" attrs in 578 578 if name_lower = "input" then begin 579 - match List.assoc_opt "type" attrs with 580 - | Some input_type when String.lowercase_ascii input_type = "checkbox" -> 579 + match Attr_utils.get_attr "type" attrs with 580 + | Some input_type when Astring.String.Ascii.lowercase input_type = "checkbox" -> 581 581 if aria_checked <> None then 582 582 Message_collector.add_typed collector 583 583 (`Aria (`Must_not_use (`Attr "aria-checked", `Elem "input", ··· 586 586 end; 587 587 588 588 (* Check for aria-expanded on roles that don't support it *) 589 - let aria_expanded = List.assoc_opt "aria-expanded" attrs in 589 + let aria_expanded = Attr_utils.get_attr "aria-expanded" attrs in 590 590 if aria_expanded <> None then begin 591 591 let role_to_check = match explicit_roles with 592 592 | first :: _ -> Some first ··· 605 605 (* Special message for input[type=text] with role="textbox" *) 606 606 let reason = 607 607 if name_lower = "input" && first_role = "textbox" then begin 608 - let has_list = List.exists (fun (k, _) -> String.lowercase_ascii k = "list") attrs in 609 - let input_type = match List.assoc_opt "type" attrs with 610 - | Some t -> String.lowercase_ascii t 608 + let has_list = Attr_utils.has_attr "list" attrs in 609 + let input_type = match Attr_utils.get_attr "type" attrs with 610 + | Some t -> Astring.String.Ascii.lowercase t 611 611 | None -> "text" 612 612 in 613 613 if not has_list && input_type = "text" then ··· 671 671 672 672 (* Check for redundant default ARIA attribute values *) 673 673 List.iter (fun (attr_name, attr_value) -> 674 - let attr_lower = String.lowercase_ascii attr_name in 674 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 675 675 if String.starts_with ~prefix:"aria-" attr_lower then 676 676 match Hashtbl.find_opt aria_default_values attr_lower with 677 677 | Some default_value -> 678 - let value_lower = String.lowercase_ascii (String.trim attr_value) in 678 + let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in 679 679 if value_lower = default_value then 680 680 Message_collector.add_typed collector 681 681 (`Generic (Printf.sprintf ··· 688 688 if name_lower = "summary" then begin 689 689 let parent = get_parent_element state in 690 690 let is_in_details = parent = Some "details" in 691 - let has_role_attr = List.exists (fun (k, _) -> String.lowercase_ascii k = "role") attrs in 692 - let has_aria_expanded = List.assoc_opt "aria-expanded" attrs <> None in 693 - let has_aria_pressed = List.assoc_opt "aria-pressed" attrs <> None in 691 + let has_role_attr = Attr_utils.has_attr "role" attrs in 692 + let has_aria_expanded = Attr_utils.has_attr "aria-expanded" attrs in 693 + let has_aria_pressed = Attr_utils.has_attr "aria-pressed" attrs in 694 694 if is_in_details then begin 695 695 (* summary that is the first child of details *) 696 696 if has_role_attr then ··· 726 726 (* Custom elements (autonomous custom elements) have generic role by default 727 727 and cannot have accessible names unless they have an explicit role *) 728 728 let attrs = element.raw_attrs in 729 - let role_attr = List.assoc_opt "role" attrs in 730 - let aria_label = List.assoc_opt "aria-label" attrs in 731 - let aria_labelledby = List.assoc_opt "aria-labelledby" attrs in 732 - let aria_braillelabel = List.assoc_opt "aria-braillelabel" attrs in 729 + let role_attr = Attr_utils.get_attr "role" attrs in 730 + let aria_label = Attr_utils.get_attr "aria-label" attrs in 731 + let aria_labelledby = Attr_utils.get_attr "aria-labelledby" attrs in 732 + let aria_braillelabel = Attr_utils.get_attr "aria-braillelabel" attrs in 733 733 let has_aria_label = match aria_label with Some v -> String.trim v <> "" | None -> false in 734 734 let has_aria_labelledby = match aria_labelledby with Some v -> String.trim v <> "" | None -> false in 735 735 let has_aria_braillelabel = match aria_braillelabel with Some v -> String.trim v <> "" | None -> false in
+21 -21
lib/check/specialized/attr_restrictions_checker.ml
··· 58 58 match element.Element.tag with 59 59 | Tag.Html _ -> 60 60 let name = Tag.tag_to_string element.tag in 61 - let name_lower = String.lowercase_ascii name in 61 + let name_lower = Astring.String.Ascii.lowercase name in 62 62 let attrs = element.raw_attrs in 63 63 64 64 (* Detect XHTML mode from xmlns attribute on html element *) ··· 86 86 (* Check for xmlns:* prefixed attributes - not allowed in HTML *) 87 87 (* Standard xmlns declarations are allowed but custom prefixes are not *) 88 88 List.iter (fun (attr_name, _) -> 89 - let attr_lower = String.lowercase_ascii attr_name in 89 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 90 90 if String.starts_with ~prefix:"xmlns:" attr_lower then begin 91 91 let prefix = String.sub attr_lower 6 (String.length attr_lower - 6) in 92 92 (* Only xmlns:xlink (with correct value) and xmlns:xml are allowed *) ··· 113 113 (* Validate style type attribute - must be "text/css" or omitted *) 114 114 if name_lower = "style" then begin 115 115 List.iter (fun (attr_name, attr_value) -> 116 - let attr_lower = String.lowercase_ascii attr_name in 116 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 117 117 if attr_lower = "type" then begin 118 - let value_lower = String.lowercase_ascii (String.trim attr_value) in 118 + let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in 119 119 if value_lower <> "text/css" then 120 120 Message_collector.add_typed collector (`Misc `Style_type_invalid) 121 121 end ··· 144 144 (* imagesrcset requires as="image" *) 145 145 if has_imagesrcset then begin 146 146 let as_is_image = match as_value with 147 - | Some v -> String.lowercase_ascii (String.trim v) = "image" 147 + | Some v -> Astring.String.Ascii.lowercase (String.trim v) = "image" 148 148 | None -> false 149 149 in 150 150 if not as_is_image then ··· 164 164 (* Validate img usemap attribute - must be hash-name reference with content *) 165 165 if name_lower = "img" then begin 166 166 List.iter (fun (attr_name, attr_value) -> 167 - let attr_lower = String.lowercase_ascii attr_name in 167 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 168 168 if attr_lower = "usemap" then begin 169 169 if attr_value = "#" then 170 170 Message_collector.add_typed collector ··· 178 178 (* Validate embed type attribute - must be valid MIME type *) 179 179 if name_lower = "embed" then begin 180 180 List.iter (fun (attr_name, attr_value) -> 181 - let attr_lower = String.lowercase_ascii attr_name in 181 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 182 182 if attr_lower = "type" then begin 183 183 match Dt_mime.validate_mime_type attr_value with 184 184 | Ok () -> () ··· 197 197 name_lower = "iframe" || name_lower = "source" in 198 198 if is_dimension_element then begin 199 199 List.iter (fun (attr_name, attr_value) -> 200 - let attr_lower = String.lowercase_ascii attr_name in 200 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 201 201 if attr_lower = "width" || attr_lower = "height" then begin 202 202 (* Check for non-negative integer only *) 203 203 let is_valid = ··· 245 245 (* Validate area[shape=default] cannot have coords *) 246 246 if name_lower = "area" then begin 247 247 match Attr_utils.get_attr "shape" attrs with 248 - | Some s when String.lowercase_ascii (String.trim s) = "default" -> 248 + | Some s when Astring.String.Ascii.lowercase (String.trim s) = "default" -> 249 249 if Attr_utils.has_attr "coords" attrs then 250 250 Message_collector.add_typed collector 251 251 (`Attr (`Not_allowed (`Attr "coords", `Elem "area"))) ··· 257 257 match Attr_utils.get_attr "dir" attrs with 258 258 | None -> 259 259 Message_collector.add_typed collector (`Misc `Bdo_missing_dir) 260 - | Some v when String.lowercase_ascii (String.trim v) = "auto" -> 260 + | Some v when Astring.String.Ascii.lowercase (String.trim v) = "auto" -> 261 261 Message_collector.add_typed collector (`Misc `Bdo_dir_auto) 262 262 | _ -> () 263 263 end; ··· 266 266 if name_lower = "input" then begin 267 267 if Attr_utils.has_attr "list" attrs then begin 268 268 let input_type = Attr_utils.get_attr_or "type" ~default:"text" attrs 269 - |> String.trim |> String.lowercase_ascii in 269 + |> String.trim |> Astring.String.Ascii.lowercase in 270 270 if not (List.mem input_type input_types_allowing_list) then 271 271 Message_collector.add_typed collector (`Input `List_not_allowed) 272 272 end ··· 274 274 275 275 (* Validate data-* attributes *) 276 276 List.iter (fun (attr_name, _) -> 277 - let attr_lower = String.lowercase_ascii attr_name in 277 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 278 278 (* Check if it starts with "data-" *) 279 279 if String.starts_with ~prefix:"data-" attr_lower then begin 280 280 let after_prefix = String.sub attr_lower 5 (String.length attr_lower - 5) in ··· 297 297 (match lang_value with 298 298 | None -> 299 299 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang) 300 - | Some lang when String.lowercase_ascii lang <> String.lowercase_ascii xmllang -> 300 + | Some lang when Astring.String.Ascii.lowercase lang <> Astring.String.Ascii.lowercase xmllang -> 301 301 Message_collector.add_typed collector (`I18n `Xml_lang_without_lang) 302 302 | _ -> ()) 303 303 | None -> () ··· 305 305 306 306 (* Validate spellcheck attribute - must be "true" or "false" or empty *) 307 307 List.iter (fun (attr_name, attr_value) -> 308 - let attr_lower = String.lowercase_ascii attr_name in 308 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 309 309 if attr_lower = "spellcheck" then begin 310 - let value_lower = String.lowercase_ascii (String.trim attr_value) in 310 + let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in 311 311 if value_lower <> "" && value_lower <> "true" && value_lower <> "false" then 312 312 Message_collector.add_typed collector 313 313 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason ""))) ··· 317 317 (* Validate enterkeyhint attribute - must be one of specific values *) 318 318 let valid_enterkeyhint = ["enter"; "done"; "go"; "next"; "previous"; "search"; "send"] in 319 319 List.iter (fun (attr_name, attr_value) -> 320 - let attr_lower = String.lowercase_ascii attr_name in 320 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 321 321 if attr_lower = "enterkeyhint" then begin 322 - let value_lower = String.lowercase_ascii (String.trim attr_value) in 322 + let value_lower = Astring.String.Ascii.lowercase (String.trim attr_value) in 323 323 if not (List.mem value_lower valid_enterkeyhint) then 324 324 Message_collector.add_typed collector 325 325 (`Attr (`Bad_value (`Elem name, `Attr attr_name, `Value attr_value, `Reason ""))) ··· 328 328 329 329 (* Validate headingoffset attribute - must be a number between 0 and 8 *) 330 330 List.iter (fun (attr_name, attr_value) -> 331 - let attr_lower = String.lowercase_ascii attr_name in 331 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 332 332 if attr_lower = "headingoffset" then begin 333 333 let trimmed = String.trim attr_value in 334 334 let is_valid = ··· 346 346 347 347 (* Validate accesskey attribute - each key label must be a single code point *) 348 348 List.iter (fun (attr_name, attr_value) -> 349 - let attr_lower = String.lowercase_ascii attr_name in 349 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 350 350 if attr_lower = "accesskey" then begin 351 351 (* Split by whitespace to get key labels *) 352 352 let keys = String.split_on_char ' ' attr_value |> ··· 418 418 let is_media_element = name_lower = "link" || name_lower = "style" || name_lower = "source" in 419 419 if is_media_element then begin 420 420 List.iter (fun (attr_name, attr_value) -> 421 - let attr_lower = String.lowercase_ascii attr_name in 421 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 422 422 if attr_lower = "media" then begin 423 423 let trimmed = String.trim attr_value in 424 424 if trimmed <> "" then begin ··· 436 436 437 437 (* Validate RDFa prefix attribute - space-separated list of prefix:iri pairs *) 438 438 List.iter (fun (attr_name, attr_value) -> 439 - let attr_lower = String.lowercase_ascii attr_name in 439 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 440 440 if attr_lower = "prefix" then begin 441 441 (* Parse prefix attribute value - format: "prefix1: iri1 prefix2: iri2 ..." *) 442 442 let trimmed = String.trim attr_value in
+1 -1
lib/check/specialized/datetime_checker.ml
··· 451 451 if List.mem name datetime_elements then begin 452 452 (* Check for datetime attribute *) 453 453 let datetime_attr = List.find_map (fun (k, v) -> 454 - if String.lowercase_ascii k = "datetime" then Some v else None 454 + if Astring.String.Ascii.lowercase k = "datetime" then Some v else None 455 455 ) element.raw_attrs in 456 456 match datetime_attr with 457 457 | None -> ()
+1 -1
lib/check/specialized/dl_checker.ml
··· 106 106 (`Element (`Not_allowed_as_child (`Child "div", `Parent "dl"))); 107 107 (match Attr.get_role element.attrs with 108 108 | Some role_value -> 109 - let role_lower = String.lowercase_ascii (String.trim role_value) in 109 + let role_lower = Astring.String.Ascii.lowercase (String.trim role_value) in 110 110 if role_lower <> "presentation" && role_lower <> "none" then 111 111 Message_collector.add_typed collector (`Li_role `Div_in_dl_bad_role) 112 112 | None -> ());
-37
lib/check/specialized/h1_checker.ml
··· 1 - (** H1 element counter - warns about multiple h1 elements in a document. *) 2 - 3 - type state = { 4 - mutable h1_count : int; 5 - mutable svg_depth : int; (* Track depth inside SVG *) 6 - } 7 - 8 - let create () = { 9 - h1_count = 0; 10 - svg_depth = 0; 11 - } 12 - 13 - let reset state = 14 - state.h1_count <- 0; 15 - state.svg_depth <- 0 16 - 17 - let start_element state ~element collector = 18 - (* Track SVG depth - h1 inside SVG (foreignObject, desc) shouldn't count *) 19 - match element.Element.tag with 20 - | Tag.Svg _ -> 21 - state.svg_depth <- state.svg_depth + 1 22 - | Tag.Html `H1 when state.svg_depth = 0 -> 23 - state.h1_count <- state.h1_count + 1; 24 - if state.h1_count > 1 then 25 - Message_collector.add_typed collector (`Misc `Multiple_h1) 26 - | Tag.Html _ when state.svg_depth = 0 -> 27 - () (* Other HTML elements outside SVG *) 28 - | _ -> 29 - () (* Non-HTML or inside SVG *) 30 - 31 - let end_element state ~tag _collector = 32 - match tag with 33 - | Tag.Svg _ when state.svg_depth > 0 -> 34 - state.svg_depth <- state.svg_depth - 1 35 - | _ -> () 36 - 37 - let checker = Checker.make ~create ~reset ~start_element ~end_element ()
-16
lib/check/specialized/h1_checker.mli
··· 1 - (** H1 element counter checker. 2 - 3 - This checker validates that documents don't have multiple h1 elements, 4 - which can confuse document structure and accessibility tools. 5 - 6 - {2 Validation Rules} 7 - 8 - - Documents should have at most one [<h1>] element 9 - - [<h1>] elements inside SVG content (foreignObject, desc) are not counted 10 - 11 - {2 Error Messages} 12 - 13 - - [Multiple_h1]: Document contains more than one h1 element *) 14 - 15 - val checker : Checker.t 16 - (** The H1 checker instance. *)
+19 -107
lib/check/specialized/heading_checker.ml
··· 1 1 (** Heading structure validation checker. 2 2 3 3 This checker validates that: 4 - - Heading levels don't skip (e.g., h1 to h3) 5 - - Documents have at least one heading 6 - - Multiple h1 usage is noted 7 - - Headings are not empty *) 4 + - Multiple h1 usage is reported as an error 5 + 6 + Note: Additional accessibility checks (first heading should be h1, skipped 7 + levels, empty headings) are intentionally not included as errors since they 8 + are recommendations rather than HTML5 spec requirements. *) 8 9 9 10 (** Checker state tracking heading structure. *) 10 11 type state = { 11 - mutable current_level : int option; 12 12 mutable h1_count : int; 13 - mutable has_any_heading : bool; 14 - mutable first_heading_checked : bool; 15 - mutable in_heading : Tag.html_tag option; 16 - mutable heading_has_text : bool; 17 13 mutable svg_depth : int; (* Track depth inside SVG - headings in SVG don't count *) 18 14 } 19 15 20 - let create () = 21 - { 22 - current_level = None; 23 - h1_count = 0; 24 - has_any_heading = false; 25 - first_heading_checked = false; 26 - in_heading = None; 27 - heading_has_text = false; 28 - svg_depth = 0; 29 - } 16 + let create () = { 17 + h1_count = 0; 18 + svg_depth = 0; 19 + } 30 20 31 21 let reset state = 32 - state.current_level <- None; 33 22 state.h1_count <- 0; 34 - state.has_any_heading <- false; 35 - state.first_heading_checked <- false; 36 - state.in_heading <- None; 37 - state.heading_has_text <- false; 38 23 state.svg_depth <- 0 39 24 40 - (** Check if text is effectively empty (only whitespace). *) 41 - let is_empty_text text = 42 - let rec check i = 43 - if i >= String.length text then 44 - true 45 - else 46 - match text.[i] with 47 - | ' ' | '\t' | '\n' | '\r' -> check (i + 1) 48 - | _ -> false 49 - in 50 - check 0 51 - 52 25 let start_element state ~element collector = 53 26 match element.Element.tag with 54 27 | Tag.Svg _ -> 55 - (* Track SVG depth - headings inside SVG (foreignObject, desc) don't count *) 28 + (* Track SVG depth - h1 inside SVG (foreignObject, desc) shouldn't count *) 56 29 state.svg_depth <- state.svg_depth + 1 57 - | Tag.Html (#Tag.heading_tag as h) when state.svg_depth = 0 -> 58 - let level = match Tag.heading_level h with Some l -> l | None -> 0 in 59 - let name = Tag.html_tag_to_string h in 60 - state.has_any_heading <- true; 61 - 62 - (* Check if this is the first heading *) 63 - if not state.first_heading_checked then begin 64 - state.first_heading_checked <- true; 65 - if level <> 1 then 66 - Message_collector.add_typed collector 67 - (`Generic (Printf.sprintf 68 - "First heading in document is <%s>, should typically be <h1>" name)) 69 - end; 70 - 71 - (* Track h1 count *) 72 - if level = 1 then begin 73 - state.h1_count <- state.h1_count + 1; 74 - if state.h1_count > 1 then 75 - Message_collector.add_typed collector (`Misc `Multiple_h1) 76 - end; 77 - 78 - (* Check for skipped levels *) 79 - begin match state.current_level with 80 - | None -> 81 - state.current_level <- Some level 82 - | Some prev_level -> 83 - let diff = level - prev_level in 84 - if diff > 1 then 85 - Message_collector.add_typed collector 86 - (`Generic (Printf.sprintf 87 - "Heading level skipped: <%s> follows <h%d>, skipping %d level%s. This can confuse screen reader users" 88 - name prev_level (diff - 1) (if diff > 2 then "s" else ""))); 89 - state.current_level <- Some level 90 - end; 91 - 92 - (* Track that we're in a heading to check for empty content *) 93 - state.in_heading <- Some h; 94 - state.heading_has_text <- false 30 + | Tag.Html `H1 when state.svg_depth = 0 -> 31 + state.h1_count <- state.h1_count + 1; 32 + if state.h1_count > 1 then 33 + Message_collector.add_typed collector (`Misc `Multiple_h1) 95 34 | _ -> () 96 35 97 - let end_element state ~tag collector = 98 - (* Track SVG depth *) 99 - (match tag with 100 - | Tag.Svg _ when state.svg_depth > 0 -> 101 - state.svg_depth <- state.svg_depth - 1 102 - | _ -> ()); 103 - (* Check for empty headings *) 104 - match state.in_heading, tag with 105 - | Some h, Tag.Html h2 when h = h2 -> 106 - if not state.heading_has_text then 107 - Message_collector.add_typed collector 108 - (`Generic (Printf.sprintf 109 - "Heading <%s> is empty or contains only whitespace. Empty headings are problematic for screen readers" 110 - (Tag.html_tag_to_string h))); 111 - state.in_heading <- None; 112 - state.heading_has_text <- false 36 + let end_element state ~tag _collector = 37 + match tag with 38 + | Tag.Svg _ when state.svg_depth > 0 -> 39 + state.svg_depth <- state.svg_depth - 1 113 40 | _ -> () 114 41 115 - let characters state text _collector = 116 - (* If we're inside a heading, check if this text is non-whitespace *) 117 - match state.in_heading with 118 - | Some _ -> 119 - if not (is_empty_text text) then 120 - state.heading_has_text <- true 121 - | None -> 122 - () 123 - 124 - let end_document state collector = 125 - if not state.has_any_heading then 126 - Message_collector.add_typed collector 127 - (`Generic "Document contains no heading elements (h1-h6). Headings provide important document structure for accessibility") 128 - 129 - let checker = Checker.make ~create ~reset ~start_element ~end_element 130 - ~characters ~end_document () 42 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
+2 -2
lib/check/specialized/importmap_checker.ml
··· 270 270 | Tag.Html `Script -> 271 271 (* Check if type="importmap" *) 272 272 let type_attr = List.find_opt (fun (n, _) -> 273 - String.lowercase_ascii n = "type" 273 + Astring.String.Ascii.lowercase n = "type" 274 274 ) element.raw_attrs in 275 275 (match type_attr with 276 - | Some (_, v) when String.lowercase_ascii v = "importmap" -> 276 + | Some (_, v) when Astring.String.Ascii.lowercase v = "importmap" -> 277 277 state.in_importmap <- true; 278 278 Buffer.clear state.content 279 279 | _ -> ())
+1 -1
lib/check/specialized/label_checker.ml
··· 65 65 | _ -> ()) 66 66 67 67 | Tag.Html tag -> 68 - let name_lower = String.lowercase_ascii (Tag.tag_to_string (Tag.Html tag)) in 68 + let name_lower = Astring.String.Ascii.lowercase (Tag.tag_to_string (Tag.Html tag)) in 69 69 70 70 (* Track labelable element IDs *) 71 71 (if is_labelable name_lower then
+1 -1
lib/check/specialized/language_checker.ml
··· 27 27 28 28 (** Check if a language tag contains deprecated subtags. *) 29 29 let check_deprecated_tag value = 30 - let lower = String.lowercase_ascii value in 30 + let lower = Astring.String.Ascii.lowercase value in 31 31 let subtags = String.split_on_char '-' lower in 32 32 match subtags with 33 33 | [] -> None
+3 -6
lib/check/specialized/mime_type_checker.ml
··· 153 153 let create () = () 154 154 let reset _state = () 155 155 156 - let get_attr_value name attrs = 157 - List.find_map (fun (k, v) -> 158 - if String.lowercase_ascii k = String.lowercase_ascii name then Some v else None 159 - ) attrs 156 + let get_attr_value = Attr_utils.get_attr 160 157 161 158 let start_element _state ~element collector = 162 159 match element.Element.tag with 163 160 | Tag.Html tag -> 164 161 let name = Tag.html_tag_to_string tag in 165 - let name_lower = String.lowercase_ascii name in 162 + let name_lower = Astring.String.Ascii.lowercase name in 166 163 (match List.assoc_opt name_lower mime_type_attrs with 167 164 | None -> () 168 165 | Some type_attrs -> ··· 174 171 if value = "" then () 175 172 else if name_lower = "script" then 176 173 (* script type can be module, importmap, etc. - skip validation for non-MIME types *) 177 - let value_lower = String.lowercase_ascii value in 174 + let value_lower = Astring.String.Ascii.lowercase value in 178 175 if value_lower = "module" || value_lower = "importmap" || 179 176 not (String.contains value '/') then () 180 177 else
+2 -2
lib/check/specialized/picture_checker.ml
··· 133 133 let media_value = Attr_utils.get_attr "media" attrs in 134 134 let has_type = Attr_utils.has_attr "type" attrs in 135 135 let is_media_all = match media_value with 136 - | Some v -> String.lowercase_ascii (String.trim v) = "all" 136 + | Some v -> Astring.String.Ascii.lowercase (String.trim v) = "all" 137 137 | None -> false in 138 138 let is_media_empty = match media_value with 139 139 | Some v -> String.trim v = "" ··· 142 142 | None -> not has_type 143 143 | Some v -> 144 144 let trimmed = String.trim v in 145 - trimmed = "" || String.lowercase_ascii trimmed = "all" 145 + trimmed = "" || Astring.String.Ascii.lowercase trimmed = "all" 146 146 in 147 147 if is_always_matching then begin 148 148 state.has_always_matching_source <- true;
+12 -12
lib/check/specialized/srcset_sizes_checker.ml
··· 153 153 154 154 (** Check if scientific notation has invalid exponent (like 1e+1.5 - decimal in exponent) *) 155 155 let has_invalid_scientific_notation s = 156 - let lower = String.lowercase_ascii s in 156 + let lower = Astring.String.Ascii.lowercase s in 157 157 (* Find 'e' for scientific notation *) 158 158 match String.index_opt lower 'e' with 159 159 | None -> false ··· 176 176 (* Check for % at the end *) 177 177 else if trimmed.[len - 1] = '%' then "%" 178 178 else begin 179 - let lower = String.lowercase_ascii trimmed in 179 + let lower = Astring.String.Ascii.lowercase trimmed in 180 180 (* Try to find a unit at the end (letters only) *) 181 181 let rec find_unit_length i = 182 182 if i < 0 then 0 ··· 205 205 if has_invalid_scientific_notation value_no_comments then BadScientificNotation 206 206 (* "auto" is only valid with lazy loading, which requires checking the element context. 207 207 For general validation, treat "auto" alone as invalid in sizes. *) 208 - else if String.lowercase_ascii value_no_comments = "auto" then 208 + else if Astring.String.Ascii.lowercase value_no_comments = "auto" then 209 209 BadCssNumber (value_no_comments.[0], trimmed) 210 210 else if value_no_comments = "" then InvalidUnit ("", trimmed) 211 211 else begin 212 - let lower = String.lowercase_ascii value_no_comments in 212 + let lower = Astring.String.Ascii.lowercase value_no_comments in 213 213 (* Check for calc() or other CSS functions first - these are always valid *) 214 214 if String.contains value_no_comments '(' then Valid 215 215 else begin ··· 310 310 Some "Bad media condition: Parse Error" 311 311 end else begin 312 312 (* Check for bare "all" which is invalid *) 313 - let lower = String.lowercase_ascii trimmed in 313 + let lower = Astring.String.Ascii.lowercase trimmed in 314 314 let parts = String.split_on_char ' ' lower |> List.filter (fun s -> s <> "") in 315 315 match parts with 316 316 | keyword :: _ when keyword = "all" -> ··· 358 358 end 359 359 else begin 360 360 (* Check if remaining starts with "and", "or", "not" followed by space or paren *) 361 - let lower_remaining = String.lowercase_ascii remaining in 361 + let lower_remaining = Astring.String.Ascii.lowercase remaining in 362 362 if remaining_len >= 4 && String.sub lower_remaining 0 4 = "and " then 363 363 skip_media_condition (i + (len - i) - remaining_len + 4) 364 364 else if remaining_len >= 3 && String.sub lower_remaining 0 3 = "or " then ··· 577 577 578 578 (** Validate srcset descriptor *) 579 579 let validate_srcset_descriptor desc element_name srcset_value has_sizes collector = 580 - let desc_lower = String.lowercase_ascii (String.trim desc) in 580 + let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in 581 581 if String.length desc_lower = 0 then true 582 582 else begin 583 583 let last_char = desc_lower.[String.length desc_lower - 1] in ··· 723 723 724 724 (** Normalize descriptor for duplicate detection (e.g., 1x = 1.0x) *) 725 725 let normalize_descriptor desc = 726 - let desc_lower = String.lowercase_ascii (String.trim desc) in 726 + let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in 727 727 if String.length desc_lower = 0 then desc_lower 728 728 else 729 729 let last_char = desc_lower.[String.length desc_lower - 1] in ··· 793 793 (* Special schemes that require host/content after :// *) 794 794 let special_schemes = ["http"; "https"; "ftp"; "ws"; "wss"] in 795 795 (* Check for scheme-only URL like "http:" *) 796 - let url_lower = String.lowercase_ascii url in 796 + let url_lower = Astring.String.Ascii.lowercase url in 797 797 List.iter (fun scheme -> 798 798 let scheme_colon = scheme ^ ":" in 799 799 if url_lower = scheme_colon then ··· 824 824 (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: Expected single descriptor but found extraneous descriptor %s at %s." (q value) (q "srcset") (q element_name) (q extra_desc) (q value))))) 825 825 end; 826 826 827 - let desc_lower = String.lowercase_ascii (String.trim desc) in 827 + let desc_lower = Astring.String.Ascii.lowercase (String.trim desc) in 828 828 if String.length desc_lower > 0 then begin 829 829 let last_char = desc_lower.[String.length desc_lower - 1] in 830 830 if last_char = 'w' then has_w_descriptor := true ··· 872 872 begin match Hashtbl.find_opt seen_descriptors normalized with 873 873 | Some first_url -> 874 874 Message_collector.add_typed collector 875 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (String.lowercase_ascii dup_type) (q first_url))))) 875 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (Astring.String.Ascii.lowercase dup_type) (q first_url))))) 876 876 | None -> 877 877 begin match (if is_1x then Hashtbl.find_opt seen_descriptors "implicit-1x" else None) with 878 878 | Some first_url -> 879 879 (* Explicit 1x conflicts with implicit 1x *) 880 880 Message_collector.add_typed collector 881 - (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (String.lowercase_ascii dup_type) (q first_url))))) 881 + (`Attr (`Bad_value_generic (`Message (Printf.sprintf "Bad value %s for attribute %s on element %s: %s for image %s is identical to %s for image %s." (q value) (q "srcset") (q element_name) dup_type (q url) (Astring.String.Ascii.lowercase dup_type) (q first_url))))) 882 882 | None -> 883 883 Hashtbl.add seen_descriptors normalized url; 884 884 if is_1x then Hashtbl.add seen_descriptors "explicit-1x" url
+10 -10
lib/check/specialized/svg_checker.ml
··· 260 260 261 261 (* Check if attribute name matches a pattern like "data-*" or "aria-*" - case insensitive *) 262 262 let matches_pattern attr pattern = 263 - let attr_lower = String.lowercase_ascii attr in 264 - let pattern_lower = String.lowercase_ascii pattern in 263 + let attr_lower = Astring.String.Ascii.lowercase attr in 264 + let pattern_lower = Astring.String.Ascii.lowercase pattern in 265 265 if String.ends_with ~suffix:"-*" pattern_lower then 266 266 let prefix = String.sub pattern_lower 0 (String.length pattern_lower - 1) in 267 267 String.starts_with ~prefix attr_lower ··· 361 361 state.in_svg <- true; 362 362 363 363 if is_svg_element || state.in_svg then begin 364 - let name_lower = String.lowercase_ascii name in 364 + let name_lower = Astring.String.Ascii.lowercase name in 365 365 366 366 (* Check SVG content model rules *) 367 367 (* 1. Check if child is allowed in SVG <a> *) 368 368 (match state.element_stack with 369 - | parent :: _ when String.lowercase_ascii parent = "a" -> 369 + | parent :: _ when Astring.String.Ascii.lowercase parent = "a" -> 370 370 if List.mem name_lower a_disallowed_children then 371 371 Message_collector.add_typed collector 372 372 (`Element (`Not_allowed_as_child (`Child name, `Parent "a"))) ··· 382 382 (* 2.5 Check stop element is only in linearGradient or radialGradient *) 383 383 if name_lower = "stop" then begin 384 384 match state.element_stack with 385 - | parent :: _ when (let p = String.lowercase_ascii parent in 385 + | parent :: _ when (let p = Astring.String.Ascii.lowercase parent in 386 386 p = "lineargradient" || p = "radialgradient") -> () 387 387 | parent :: _ -> 388 388 Message_collector.add_typed collector ··· 393 393 (* 2.6 Check use element is not nested inside another use element *) 394 394 if name_lower = "use" then begin 395 395 match state.element_stack with 396 - | parent :: _ when String.lowercase_ascii parent = "use" -> 396 + | parent :: _ when Astring.String.Ascii.lowercase parent = "use" -> 397 397 Message_collector.add_typed collector 398 398 (`Element (`Not_allowed_as_child (`Child name, `Parent parent))) 399 399 | _ -> () ··· 401 401 402 402 (* 3. Check duplicate feFunc* in feComponentTransfer *) 403 403 (match state.element_stack with 404 - | parent :: _ when String.lowercase_ascii parent = "fecomponenttransfer" -> 404 + | parent :: _ when Astring.String.Ascii.lowercase parent = "fecomponenttransfer" -> 405 405 if List.mem name_lower ["fefuncr"; "fefuncg"; "fefuncb"; "fefunca"] then begin 406 406 match state.fecomponenttransfer_stack with 407 407 | fect :: _ -> ··· 435 435 436 436 (* Check each attribute *) 437 437 List.iter (fun (attr, value) -> 438 - let attr_lower = String.lowercase_ascii attr in 438 + let attr_lower = Astring.String.Ascii.lowercase attr in 439 439 440 440 (* Validate xmlns attributes *) 441 441 if String.starts_with ~prefix:"xmlns" attr_lower then ··· 457 457 (match List.assoc_opt name_lower required_attrs with 458 458 | Some req_attrs -> 459 459 List.iter (fun req_attr -> 460 - if not (List.exists (fun (a, _) -> String.lowercase_ascii a = req_attr) attrs) then 460 + if not (Attr_utils.has_attr req_attr attrs) then 461 461 Message_collector.add_typed collector 462 462 (`Svg (`Missing_attr (`Elem name_lower, `Attr req_attr))) 463 463 ) req_attrs ··· 469 469 let name = Tag.tag_to_string tag in 470 470 471 471 if is_svg_element || state.in_svg then begin 472 - let name_lower = String.lowercase_ascii name in 472 + let name_lower = Astring.String.Ascii.lowercase name in 473 473 474 474 (* Check required children when closing font element *) 475 475 if name_lower = "font" then begin
+5 -5
lib/check/specialized/table_checker.ml
··· 354 354 355 355 (** Parse a non-negative integer attribute, returning 1 if absent or invalid *) 356 356 let parse_non_negative_int attrs name = 357 - match List.assoc_opt name attrs with 357 + match Attr_utils.get_attr name attrs with 358 358 | None -> 1 359 359 | Some v -> ( 360 360 try ··· 364 364 365 365 (** Parse a positive integer attribute, returning 1 if absent or invalid *) 366 366 let parse_positive_int attrs name = 367 - match List.assoc_opt name attrs with 367 + match Attr_utils.get_attr name attrs with 368 368 | None -> 1 369 369 | Some v -> ( 370 370 try ··· 374 374 375 375 (** Parse the headers attribute into a list of IDs *) 376 376 let parse_headers attrs = 377 - match List.assoc_opt "headers" attrs with 377 + match Attr_utils.get_attr "headers" attrs with 378 378 | None -> [] 379 379 | Some v -> 380 380 let parts = String.split_on_char ' ' v in ··· 523 523 table.state <- InCellInRowGroup; 524 524 (* Record header ID if present *) 525 525 if is_header then ( 526 - match List.assoc_opt "id" attrs with 526 + match Attr_utils.get_attr "id" attrs with 527 527 | Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id () 528 528 | _ -> ()); 529 529 (* Parse cell attributes *) ··· 541 541 table.state <- InCellInImplicitRowGroup; 542 542 (* Same logic as above *) 543 543 if is_header then ( 544 - match List.assoc_opt "id" attrs with 544 + match Attr_utils.get_attr "id" attrs with 545 545 | Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id () 546 546 | _ -> ()); 547 547 let colspan = abs (parse_positive_int attrs "colspan") in
+10 -4
lib/check/specialized/title_checker.ml
··· 1 1 (** Title element validation checker. *) 2 2 3 3 type state = { 4 + mutable seen_html : bool; (* true if we've seen html element (full document mode) *) 4 5 mutable in_head : bool; 5 6 mutable head_had_children : bool; (* true if head contained any child elements *) 6 7 mutable has_title : bool; ··· 10 11 } 11 12 12 13 let create () = { 14 + seen_html = false; 13 15 in_head = false; 14 16 head_had_children = false; 15 17 has_title = false; ··· 19 21 } 20 22 21 23 let reset state = 24 + state.seen_html <- false; 22 25 state.in_head <- false; 23 26 state.head_had_children <- false; 24 27 state.has_title <- false; ··· 28 31 29 32 let start_element state ~element _collector = 30 33 (match element.Element.tag with 31 - | Tag.Html `Html -> () 34 + | Tag.Html `Html -> 35 + state.seen_html <- true 32 36 | Tag.Html `Head -> 33 37 state.in_head <- true; 34 38 state.head_had_children <- false ··· 55 59 (`Element (`Must_not_be_empty (`Elem "title"))); 56 60 state.in_title <- false 57 61 | Tag.Html `Head -> 58 - (* Only report missing title if head had children (was explicit with content). 59 - An empty head was likely implicit (fragment validation from body). *) 60 - if state.in_head && not state.has_title && state.head_had_children then 62 + (* Report missing title if: 63 + - We saw an html element (full document mode), OR 64 + - Head had explicit children (was not just an implicit empty head) 65 + An empty head without html element was likely implicit (fragment validation). *) 66 + if state.in_head && not state.has_title && (state.seen_html || state.head_had_children) then 61 67 Message_collector.add_typed collector 62 68 (`Element (`Missing_child (`Parent "head", `Child "title"))); 63 69 state.in_head <- false
+1 -1
lib/check/specialized/unknown_element_checker.ml
··· 31 31 state.stack <- name :: state.stack 32 32 33 33 | Tag.Html tag -> 34 - let name_lower = String.lowercase_ascii (Tag.tag_to_string (Tag.Html tag)) in 34 + let name_lower = Astring.String.Ascii.lowercase (Tag.tag_to_string (Tag.Html tag)) in 35 35 state.stack <- name_lower :: state.stack 36 36 37 37 | _ -> () (* SVG, MathML, Custom elements are allowed *)
+24 -25
lib/check/specialized/url_checker.ml
··· 67 67 68 68 (** Check if pipe is allowed in this host context. *) 69 69 let is_pipe_allowed_in_host url host = 70 - let scheme = try String.lowercase_ascii (String.sub url 0 (String.index url ':')) with _ -> "" in 70 + let scheme = try Astring.String.Ascii.lowercase (String.sub url 0 (String.index url ':')) with _ -> "" in 71 71 scheme = "file" && is_valid_windows_drive host 72 72 73 73 (** Special schemes that require double slash (//). ··· 95 95 (c >= '0' && c <= '9') || c = '+' || c = '-' || c = '.' 96 96 ) potential_scheme in 97 97 if is_valid_scheme then 98 - Some (String.lowercase_ascii potential_scheme) 98 + Some (Astring.String.Ascii.lowercase potential_scheme) 99 99 else 100 100 None 101 101 with Not_found -> None ··· 104 104 let extract_host_and_port url = 105 105 try 106 106 let double_slash = 107 - try Some (Str.search_forward (Str.regexp "://") url 0 + 3) 108 - with Not_found -> None 107 + match Astring.String.find_sub ~sub:"://" url with 108 + | Some pos -> Some (pos + 3) 109 + | None -> None 109 110 in 110 111 match double_slash with 111 112 | None -> (None, None) ··· 250 251 (* Check for ASCII percent *) 251 252 String.contains s '%' || 252 253 (* Check for fullwidth percent (U+FF05 = 0xEF 0xBC 0x85 in UTF-8) *) 253 - try 254 - let _ = Str.search_forward (Str.regexp "\xef\xbc\x85") s 0 in 255 - true 256 - with Not_found -> false 254 + Astring.String.is_infix ~affix:"\xef\xbc\x85" s 257 255 258 256 (** Check if decoded host contains forbidden characters. 259 257 Some URLs have percent-encoded fullwidth characters that decode to forbidden chars. *) ··· 424 422 let check_path_segment url attr_name element_name = 425 423 (* Extract path: everything after authority (or after scheme: for non-authority URLs) *) 426 424 let raw_path = 427 - try 428 - let double_slash = Str.search_forward (Str.regexp "://") url 0 in 425 + match Astring.String.find_sub ~sub:"://" url with 426 + | Some double_slash -> 429 427 let after_auth_start = double_slash + 3 in 430 428 let rest = String.sub url after_auth_start (String.length url - after_auth_start) in 431 429 (* Find end of authority *) ··· 437 435 String.sub rest path_start (String.length rest - path_start) 438 436 else 439 437 "" 440 - with Not_found -> 438 + | None -> 441 439 (* No double slash - check for single slash path *) 442 - match extract_scheme url with 440 + (match extract_scheme url with 443 441 | Some _ -> 444 - let colon_pos = String.index url ':' in 445 - let after_colon = String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) in 446 - after_colon 442 + (try 443 + let colon_pos = String.index url ':' in 444 + String.sub url (colon_pos + 1) (String.length url - colon_pos - 1) 445 + with Not_found -> url) 447 446 | None -> 448 447 (* Relative URL - the whole thing is the path *) 449 - url 448 + url) 450 449 in 451 450 (* Remove query and fragment for path-specific checks *) 452 451 let path = remove_query_fragment raw_path in ··· 546 545 547 546 (** Check for illegal characters in userinfo (user:password). *) 548 547 let check_userinfo url attr_name element_name = 548 + match Astring.String.find_sub ~sub:"://" url with 549 + | None -> None 550 + | Some pos -> 549 551 try 550 552 (* Look for :// then find the LAST @ before the next / or end *) 551 - let double_slash = Str.search_forward (Str.regexp "://") url 0 + 3 in 553 + let double_slash = pos + 3 in 552 554 let rest = String.sub url double_slash (String.length url - double_slash) in 553 555 (* Find first / or ? or # to limit authority section *) 554 556 let auth_end = ··· 633 635 let url = String.trim url in 634 636 (* Empty URL check for certain attributes *) 635 637 if url = "" then begin 636 - let name_lower = String.lowercase_ascii element_name in 637 - let attr_lower = String.lowercase_ascii attr_name in 638 + let name_lower = Astring.String.Ascii.lowercase element_name in 639 + let attr_lower = Astring.String.Ascii.lowercase attr_name in 638 640 if List.mem attr_lower must_be_non_empty || 639 641 List.mem (name_lower, attr_lower) must_be_non_empty_combinations then 640 642 Some (Printf.sprintf "Bad value %s for attribute %s on element %s: Bad URL: Must be non-empty." ··· 739 741 let reset _state = () 740 742 741 743 (** Get attribute value by name. *) 742 - let get_attr_value name attrs = 743 - List.find_map (fun (k, v) -> 744 - if String.lowercase_ascii k = String.lowercase_ascii name then Some v else None 745 - ) attrs 744 + let get_attr_value = Attr_utils.get_attr 746 745 747 746 let start_element _state ~element collector = 748 747 match element.Element.tag with 749 748 | Tag.Html _ -> 750 749 let name = Tag.tag_to_string element.tag in 751 - let name_lower = String.lowercase_ascii name in 750 + let name_lower = Astring.String.Ascii.lowercase name in 752 751 let attrs = element.raw_attrs in 753 752 (* Check URL attributes for elements that have them *) 754 753 (match List.assoc_opt name_lower url_attributes with ··· 794 793 match validate_url url name "value" with 795 794 | None -> () 796 795 | Some error_msg -> 797 - let error_msg = Str.global_replace (Str.regexp "Bad URL:") "Bad absolute URL:" error_msg in 796 + let error_msg = Astring.String.concat ~sep:"Bad absolute URL:" (Astring.String.cuts ~sep:"Bad URL:" error_msg) in 798 797 Message_collector.add_typed collector (`Attr (`Bad_value_generic (`Message error_msg))) 799 798 end 800 799 end
+5 -5
lib/check/specialized/xhtml_content_checker.ml
··· 54 54 55 55 let start_element state ~element collector = 56 56 let name = Tag.tag_to_string element.Element.tag in 57 - let name_lower = String.lowercase_ascii name in 57 + let name_lower = Astring.String.Ascii.lowercase name in 58 58 let attrs = element.raw_attrs in 59 59 60 60 (* Check data-* attributes for uppercase *) ··· 63 63 (* Check if this element is allowed as child of parent *) 64 64 (match state.element_stack with 65 65 | parent :: _ -> 66 - let parent_lower = String.lowercase_ascii parent in 66 + let parent_lower = Astring.String.Ascii.lowercase parent in 67 67 if not (is_child_allowed ~parent:parent_lower ~child:name_lower) then 68 68 Message_collector.add_typed collector 69 69 (`Element (`Not_allowed_as_child (`Child name_lower, `Parent parent_lower))) ··· 71 71 72 72 (* Handle figure content model *) 73 73 (match state.element_stack with 74 - | parent :: _ when String.lowercase_ascii parent = "figure" -> 74 + | parent :: _ when Astring.String.Ascii.lowercase parent = "figure" -> 75 75 (* We're inside a figure, check content model *) 76 76 (match state.figure_stack with 77 77 | fig :: _ -> ··· 99 99 state.element_stack <- name :: state.element_stack 100 100 101 101 let end_element state ~tag _collector = 102 - let name_lower = String.lowercase_ascii (Tag.tag_to_string tag) in 102 + let name_lower = Astring.String.Ascii.lowercase (Tag.tag_to_string tag) in 103 103 (* Pop figure state if leaving a figure *) 104 104 if name_lower = "figure" then begin 105 105 match state.figure_stack with ··· 115 115 match state.element_stack with 116 116 | [] -> () 117 117 | parent :: _ -> 118 - let parent_lower = String.lowercase_ascii parent in 118 + let parent_lower = Astring.String.Ascii.lowercase parent in 119 119 let trimmed = String.trim text in 120 120 if trimmed <> "" then begin 121 121 if parent_lower = "figure" then begin
+4 -4
lib/html5rw/parser/parser.mli
··· 359 359 (** Result of parsing an HTML document or fragment. 360 360 361 361 This opaque type contains: 362 - - The DOM tree (access via {!root}) 363 - - Parse errors if collection was enabled (access via {!errors}) 364 - - Detected encoding for byte input (access via {!encoding}) 362 + - The DOM tree (access via {!val:root}) 363 + - Parse errors if collection was enabled (access via {!val:errors}) 364 + - Detected encoding for byte input (access via {!val:encoding}) 365 365 *) 366 366 type t 367 367 ··· 416 416 3. {b Transport hint}: Use [transport_encoding] if provided 417 417 4. {b Fallback}: Use UTF-8 418 418 419 - The detected encoding is stored in the result (access via {!encoding}). 419 + The detected encoding is stored in the result (access via {!val:encoding}). 420 420 421 421 {b Prescan details:} 422 422
+38
lib/js/dune
··· 37 37 (modes js wasm) 38 38 (modules htmlrw_js_worker)) 39 39 40 + ; Test runner for browser-based regression testing 41 + ; Runs html5lib conformance tests in the browser 42 + (executable 43 + (name htmlrw_js_tests_main) 44 + (libraries html5rw bytesrw brr) 45 + (js_of_ocaml 46 + (javascript_files)) 47 + (modes js wasm) 48 + (modules htmlrw_js_tests htmlrw_js_tests_main)) 49 + 40 50 ; Copy to nice filenames (JS) 41 51 (rule 42 52 (targets htmlrw.js) ··· 48 58 (deps htmlrw_js_worker.bc.js) 49 59 (action (copy %{deps} %{targets}))) 50 60 61 + (rule 62 + (targets htmlrw-tests.js) 63 + (deps htmlrw_js_tests_main.bc.js) 64 + (action (copy %{deps} %{targets}))) 65 + 51 66 ; Copy to nice filenames (WASM) 52 67 ; Note: requires wasm_of_ocaml-compiler to be installed 53 68 (rule ··· 59 74 (targets htmlrw-worker.wasm.js) 60 75 (deps htmlrw_js_worker.bc.wasm.js) 61 76 (action (copy %{deps} %{targets}))) 77 + 78 + (rule 79 + (targets htmlrw-tests.wasm.js) 80 + (deps htmlrw_js_tests_main.bc.wasm.js) 81 + (action (copy %{deps} %{targets}))) 82 + 83 + ; Install web assets to share/html5rw-js/ for npm packaging 84 + (install 85 + (package html5rw-js) 86 + (section share) 87 + (files 88 + ; JavaScript bundles 89 + htmlrw.js 90 + htmlrw-worker.js 91 + htmlrw-tests.js 92 + ; WASM loader scripts 93 + htmlrw.wasm.js 94 + htmlrw-worker.wasm.js 95 + htmlrw-tests.wasm.js 96 + ; WASM assets (with content-hashed filenames) 97 + (glob_files_rec (htmlrw_js_main.bc.wasm.assets/* with_prefix htmlrw_js_main.bc.wasm.assets)) 98 + (glob_files_rec (htmlrw_js_worker.bc.wasm.assets/* with_prefix htmlrw_js_worker.bc.wasm.assets)) 99 + (glob_files_rec (htmlrw_js_tests_main.bc.wasm.assets/* with_prefix htmlrw_js_tests_main.bc.wasm.assets))))
+3 -2
lib/js/htmlrw_js.mli
··· 47 47 (** Validate an HTML string. 48 48 49 49 This is the simplest form of validation. Since there's no source element, 50 - the returned {!browser_message}s will not have element references. 50 + the returned messages will not have element references. 51 51 52 52 {[ 53 53 let result = validate_string "<html><body><img></body></html>" in ··· 83 83 descendants are annotated with data attributes, classes, and optionally 84 84 tooltips based on the validation results. 85 85 86 - @param config Annotation configuration. Defaults to {!default_annotation_config}. *) 86 + @param config Annotation configuration. Defaults to 87 + [Htmlrw_js_types.default_annotation_config]. *) 87 88 val validate_and_annotate : 88 89 ?config:annotation_config -> Brr.El.t -> result 89 90
+407
lib/js/htmlrw_js_tests.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Browser-compatible test runner for html5rw regression tests. 7 + 8 + This module provides functions to run html5lib conformance tests 9 + in the browser, receiving test data as strings and returning results 10 + as JavaScript-accessible objects. *) 11 + 12 + [@@@warning "-69-33"] (* Silence unused-field and unused-open warnings *) 13 + 14 + open Brr 15 + 16 + (* ============================================================ *) 17 + (* Test Result Types *) 18 + (* ============================================================ *) 19 + 20 + type test_result = { 21 + test_num : int; 22 + description : string; 23 + input : string; 24 + expected : string; 25 + actual : string; 26 + success : bool; 27 + } 28 + 29 + type file_result = { 30 + filename : string; 31 + test_type : string; 32 + passed_count : int; 33 + failed_count : int; 34 + tests : test_result list; 35 + } 36 + 37 + type suite_result = { 38 + name : string; 39 + total_passed : int; 40 + total_failed : int; 41 + files : file_result list; 42 + } 43 + 44 + (* ============================================================ *) 45 + (* Tree Construction Tests *) 46 + (* ============================================================ *) 47 + 48 + module TreeConstruction = struct 49 + type test_case = { 50 + input : string; 51 + expected_tree : string; 52 + expected_errors : string list; 53 + script_on : bool; 54 + fragment_context : string option; 55 + } 56 + 57 + let parse_test_case lines = 58 + let rec parse acc = function 59 + | [] -> acc 60 + | line :: rest when String.length line > 0 && line.[0] = '#' -> 61 + let section = String.trim line in 62 + let content, remaining = collect_section rest in 63 + parse ((section, content) :: acc) remaining 64 + | _ :: rest -> parse acc rest 65 + and collect_section lines = 66 + let rec loop acc = function 67 + | [] -> (List.rev acc, []) 68 + | line :: rest when String.length line > 0 && line.[0] = '#' -> 69 + (List.rev acc, line :: rest) 70 + | line :: rest -> loop (line :: acc) rest 71 + in 72 + loop [] lines 73 + in 74 + let sections = parse [] lines in 75 + let get_section name = 76 + match List.assoc_opt name sections with 77 + | Some lines -> String.concat "\n" lines 78 + | None -> "" 79 + in 80 + let data = get_section "#data" in 81 + let document = get_section "#document" in 82 + let errors_text = get_section "#errors" in 83 + let errors = 84 + String.split_on_char '\n' errors_text 85 + |> List.filter (fun s -> String.trim s <> "") 86 + in 87 + let script_on = List.mem_assoc "#script-on" sections in 88 + let fragment = 89 + if List.mem_assoc "#document-fragment" sections then 90 + Some (get_section "#document-fragment" |> String.trim) 91 + else None 92 + in 93 + { input = data; expected_tree = document; expected_errors = errors; 94 + script_on; fragment_context = fragment } 95 + 96 + let parse_dat_content content = 97 + let lines = String.split_on_char '\n' content in 98 + let rec split_tests current acc = function 99 + | [] -> 100 + if current = [] then List.rev acc 101 + else List.rev (List.rev current :: acc) 102 + | "" :: "#data" :: rest -> 103 + let new_acc = if current = [] then acc else (List.rev current :: acc) in 104 + split_tests ["#data"] new_acc rest 105 + | line :: rest -> 106 + split_tests (line :: current) acc rest 107 + in 108 + let test_groups = split_tests [] [] lines in 109 + List.filter_map (fun lines -> 110 + if List.exists (fun l -> l = "#data") lines then 111 + Some (parse_test_case lines) 112 + else None 113 + ) test_groups 114 + 115 + let strip_tree_prefix s = 116 + let lines = String.split_on_char '\n' s in 117 + let stripped = List.filter_map (fun line -> 118 + if String.length line >= 2 && String.sub line 0 2 = "| " then 119 + Some (String.sub line 2 (String.length line - 2)) 120 + else if String.trim line = "" then None 121 + else Some line 122 + ) lines in 123 + String.concat "\n" stripped 124 + 125 + let normalize_tree s = 126 + let lines = String.split_on_char '\n' s in 127 + let non_empty = List.filter (fun l -> String.trim l <> "") lines in 128 + String.concat "\n" non_empty 129 + 130 + let run_test test = 131 + try 132 + let result = 133 + match test.fragment_context with 134 + | Some ctx_str -> 135 + let (namespace, tag_name) = 136 + match String.split_on_char ' ' ctx_str with 137 + | [ns; tag] when ns = "svg" -> (Some "svg", tag) 138 + | [ns; tag] when ns = "math" -> (Some "mathml", tag) 139 + | [tag] -> (None, tag) 140 + | _ -> (None, ctx_str) 141 + in 142 + let context = Html5rw.Parser.make_fragment_context ~tag_name ~namespace () in 143 + let reader = Bytesrw.Bytes.Reader.of_string test.input in 144 + Html5rw.Parser.parse ~collect_errors:true ~fragment_context:context reader 145 + | None -> 146 + let reader = Bytesrw.Bytes.Reader.of_string test.input in 147 + Html5rw.Parser.parse ~collect_errors:true reader 148 + in 149 + let actual_tree = Html5rw.Dom.to_test_format (Html5rw.Parser.root result) in 150 + let expected = normalize_tree (strip_tree_prefix test.expected_tree) in 151 + let actual = normalize_tree (strip_tree_prefix actual_tree) in 152 + (expected = actual, expected, actual) 153 + with e -> 154 + let expected = normalize_tree (strip_tree_prefix test.expected_tree) in 155 + (false, expected, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e)) 156 + 157 + let run_content ~filename content = 158 + let tests = parse_dat_content content in 159 + let passed = ref 0 in 160 + let failed = ref 0 in 161 + let results = ref [] in 162 + List.iteri (fun i test -> 163 + if test.script_on then () 164 + else begin 165 + let (success, expected, actual) = run_test test in 166 + let description = 167 + let input_preview = String.sub test.input 0 (min 60 (String.length test.input)) in 168 + if test.fragment_context <> None then 169 + Printf.sprintf "Fragment (%s): %s" (Option.get test.fragment_context) input_preview 170 + else input_preview 171 + in 172 + let result = { 173 + test_num = i + 1; description; input = test.input; expected; actual; success; 174 + } in 175 + results := result :: !results; 176 + if success then incr passed else incr failed 177 + end 178 + ) tests; 179 + { 180 + filename; test_type = "Tree Construction"; 181 + passed_count = !passed; failed_count = !failed; 182 + tests = List.rev !results; 183 + } 184 + end 185 + 186 + (* ============================================================ *) 187 + (* Encoding Tests *) 188 + (* ============================================================ *) 189 + 190 + module EncodingTests = struct 191 + type test_case = { 192 + input : string; 193 + expected_encoding : string; 194 + } 195 + 196 + let normalize_encoding_name s = String.lowercase_ascii (String.trim s) 197 + 198 + let encoding_to_test_name = function 199 + | Html5rw.Encoding.Utf8 -> "utf-8" 200 + | Html5rw.Encoding.Utf16le -> "utf-16le" 201 + | Html5rw.Encoding.Utf16be -> "utf-16be" 202 + | Html5rw.Encoding.Windows_1252 -> "windows-1252" 203 + | Html5rw.Encoding.Iso_8859_2 -> "iso-8859-2" 204 + | Html5rw.Encoding.Euc_jp -> "euc-jp" 205 + 206 + let parse_test_case lines = 207 + let rec parse acc = function 208 + | [] -> acc 209 + | line :: rest when String.length line > 0 && line.[0] = '#' -> 210 + let section = String.trim line in 211 + let content, remaining = collect_section rest in 212 + parse ((section, content) :: acc) remaining 213 + | _ :: rest -> parse acc rest 214 + and collect_section lines = 215 + let rec loop acc = function 216 + | [] -> (List.rev acc, []) 217 + | line :: rest when String.length line > 0 && line.[0] = '#' -> 218 + (List.rev acc, line :: rest) 219 + | line :: rest -> loop (line :: acc) rest 220 + in loop [] lines 221 + in 222 + let sections = parse [] lines in 223 + let get_section name = 224 + match List.assoc_opt name sections with 225 + | Some lines -> String.concat "\n" lines | None -> "" 226 + in 227 + let data = get_section "#data" in 228 + let encoding = get_section "#encoding" in 229 + { input = data; expected_encoding = String.trim encoding } 230 + 231 + let parse_dat_content content = 232 + let lines = String.split_on_char '\n' content in 233 + let rec split_tests current acc = function 234 + | [] -> if current = [] then List.rev acc else List.rev (List.rev current :: acc) 235 + | "" :: "#data" :: rest -> 236 + let new_acc = if current = [] then acc else (List.rev current :: acc) in 237 + split_tests ["#data"] new_acc rest 238 + | line :: rest -> split_tests (line :: current) acc rest 239 + in 240 + let test_groups = split_tests [] [] lines in 241 + List.filter_map (fun lines -> 242 + if List.exists (fun l -> l = "#data") lines then Some (parse_test_case lines) 243 + else None 244 + ) test_groups 245 + 246 + let run_test test = 247 + try 248 + let (_, detected_encoding) = Html5rw.Encoding.decode (Bytes.of_string test.input) () in 249 + let detected_name = encoding_to_test_name detected_encoding in 250 + let expected_name = normalize_encoding_name test.expected_encoding in 251 + let match_encoding det exp = 252 + det = exp || 253 + (det = "windows-1252" && (exp = "windows-1252" || exp = "cp1252" || exp = "iso-8859-1")) || 254 + (det = "iso-8859-2" && (exp = "iso-8859-2" || exp = "iso8859-2" || exp = "latin2")) || 255 + (det = "utf-8" && (exp = "utf-8" || exp = "utf8")) || 256 + (det = "euc-jp" && (exp = "euc-jp" || exp = "eucjp")) 257 + in 258 + (match_encoding detected_name expected_name, detected_name, expected_name) 259 + with e -> 260 + (false, Printf.sprintf "EXCEPTION: %s" (Printexc.to_string e), test.expected_encoding) 261 + 262 + let run_content ~filename content = 263 + let tests = parse_dat_content content in 264 + let passed = ref 0 in 265 + let failed = ref 0 in 266 + let results = ref [] in 267 + List.iteri (fun i test -> 268 + if String.trim test.expected_encoding = "" then () 269 + else begin 270 + let (success, detected, expected) = run_test test in 271 + let result = { 272 + test_num = i + 1; 273 + description = Printf.sprintf "Detect %s encoding" expected; 274 + input = String.escaped test.input; 275 + expected; actual = detected; success; 276 + } in 277 + results := result :: !results; 278 + if success then incr passed else incr failed 279 + end 280 + ) tests; 281 + { 282 + filename; test_type = "Encoding Detection"; 283 + passed_count = !passed; failed_count = !failed; 284 + tests = List.rev !results; 285 + } 286 + end 287 + 288 + (* ============================================================ *) 289 + (* JavaScript API *) 290 + (* ============================================================ *) 291 + 292 + let test_result_to_jv (r : test_result) = 293 + Jv.obj [| 294 + "testNum", Jv.of_int r.test_num; 295 + "description", Jv.of_string r.description; 296 + "input", Jv.of_string r.input; 297 + "expected", Jv.of_string r.expected; 298 + "actual", Jv.of_string r.actual; 299 + "success", Jv.of_bool r.success; 300 + |] 301 + 302 + let file_result_to_jv (r : file_result) = 303 + Jv.obj [| 304 + "filename", Jv.of_string r.filename; 305 + "testType", Jv.of_string r.test_type; 306 + "passedCount", Jv.of_int r.passed_count; 307 + "failedCount", Jv.of_int r.failed_count; 308 + "tests", Jv.of_list test_result_to_jv r.tests; 309 + |] 310 + 311 + let suite_result_to_jv (r : suite_result) = 312 + Jv.obj [| 313 + "name", Jv.of_string r.name; 314 + "totalPassed", Jv.of_int r.total_passed; 315 + "totalFailed", Jv.of_int r.total_failed; 316 + "files", Jv.of_list file_result_to_jv r.files; 317 + |] 318 + 319 + (** Run tree construction tests on a single file's content *) 320 + let run_tree_construction_test filename content = 321 + let result = TreeConstruction.run_content ~filename content in 322 + file_result_to_jv result 323 + 324 + (** Run encoding detection tests on a single file's content *) 325 + let run_encoding_test filename content = 326 + let result = EncodingTests.run_content ~filename content in 327 + file_result_to_jv result 328 + 329 + (** Run all tests from provided test data *) 330 + let run_all_tests (test_files : (string * string * string) list) = 331 + let tree_files = ref [] in 332 + let encoding_files = ref [] in 333 + let total_passed = ref 0 in 334 + let total_failed = ref 0 in 335 + 336 + List.iter (fun (test_type, filename, content) -> 337 + let result = match test_type with 338 + | "tree-construction" -> 339 + let r = TreeConstruction.run_content ~filename content in 340 + tree_files := r :: !tree_files; 341 + r 342 + | "encoding" -> 343 + let r = EncodingTests.run_content ~filename content in 344 + encoding_files := r :: !encoding_files; 345 + r 346 + | _ -> failwith ("Unknown test type: " ^ test_type) 347 + in 348 + total_passed := !total_passed + result.passed_count; 349 + total_failed := !total_failed + result.failed_count 350 + ) test_files; 351 + 352 + let all_files = List.rev !tree_files @ List.rev !encoding_files in 353 + let suite = { 354 + name = "HTML5lib Regression Tests"; 355 + total_passed = !total_passed; 356 + total_failed = !total_failed; 357 + files = all_files; 358 + } in 359 + suite_result_to_jv suite 360 + 361 + (* ============================================================ *) 362 + (* Simple Parser Test for Quick Validation *) 363 + (* ============================================================ *) 364 + 365 + let quick_parse_test html = 366 + try 367 + let reader = Bytesrw.Bytes.Reader.of_string html in 368 + let result = Html5rw.Parser.parse ~collect_errors:true reader in 369 + let root = Html5rw.Parser.root result in 370 + let serialized = Html5rw.Dom.to_html root in 371 + let errors = Html5rw.Parser.errors result in 372 + let error_to_string e = Format.asprintf "%a" Html5rw.pp_parse_error e in 373 + Jv.obj [| 374 + "success", Jv.of_bool true; 375 + "html", Jv.of_string serialized; 376 + "errorCount", Jv.of_int (List.length errors); 377 + "errors", Jv.of_list (fun e -> Jv.of_string (error_to_string e)) errors; 378 + |] 379 + with e -> 380 + Jv.obj [| 381 + "success", Jv.of_bool false; 382 + "error", Jv.of_string (Printexc.to_string e); 383 + |] 384 + 385 + (* ============================================================ *) 386 + (* Export to JavaScript *) 387 + (* ============================================================ *) 388 + 389 + let () = 390 + let html5rw_tests = Jv.obj [| 391 + "runTreeConstructionTest", Jv.callback ~arity:2 (fun filename content -> 392 + run_tree_construction_test (Jv.to_string filename) (Jv.to_string content)); 393 + "runEncodingTest", Jv.callback ~arity:2 (fun filename content -> 394 + run_encoding_test (Jv.to_string filename) (Jv.to_string content)); 395 + "runAllTests", Jv.callback ~arity:1 (fun files_jv -> 396 + let files = Jv.to_list (fun item -> 397 + let test_type = Jv.to_string (Jv.get item "type") in 398 + let filename = Jv.to_string (Jv.get item "filename") in 399 + let content = Jv.to_string (Jv.get item "content") in 400 + (test_type, filename, content) 401 + ) files_jv in 402 + run_all_tests files); 403 + "quickParseTest", Jv.callback ~arity:1 (fun html -> 404 + quick_parse_test (Jv.to_string html)); 405 + "version", Jv.of_string "1.0.0"; 406 + |] in 407 + Jv.set Jv.global "html5rwTests" html5rw_tests
+25
lib/js/htmlrw_js_tests.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Browser-compatible test runner for html5rw regression tests. 7 + 8 + This module provides functions to run html5lib conformance tests 9 + in the browser. Test data is provided as strings and results are 10 + returned as JavaScript-accessible objects. 11 + 12 + {2 JavaScript API} 13 + 14 + The following functions are exposed to JavaScript via the global 15 + [html5rwTests] object: 16 + 17 + - [html5rwTests.runTreeConstructionTest(filename, content)] - Run tree 18 + construction tests from a .dat file content 19 + - [html5rwTests.runEncodingTest(filename, content)] - Run encoding 20 + detection tests from a .dat file content 21 + - [html5rwTests.runAllTests(files)] - Run all tests from an array of 22 + file objects with {type, filename, content} 23 + - [html5rwTests.quickParseTest(html)] - Quick parse test for a single 24 + HTML string 25 + - [html5rwTests.version] - Version string *)
+10
lib/js/htmlrw_js_tests_main.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Entry point for the browser test runner. 7 + 8 + The test runner module registers its JavaScript exports when loaded. *) 9 + 10 + (* Nothing needed here - the Htmlrw_js_tests module registers exports at load time *)
+6
lib/js/htmlrw_js_tests_main.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: ISC 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Entry point for the browser test runner. *)
+46 -13
lib/js/htmlrw_js_ui.ml
··· 6 6 open Brr 7 7 open Htmlrw_js_types 8 8 9 + let console_log msg = 10 + ignore (Jv.call (Jv.get Jv.global "console") "log" [| Jv.of_string ("[html5rw-ui] " ^ msg) |]) 11 + 9 12 module Css_class = struct 10 13 let panel = Jstr.v "html5rw-panel" 11 14 let panel_header = Jstr.v "html5rw-panel-header" ··· 71 74 let highlighted_element t = t.highlighted 72 75 73 76 let clear_highlight t = 77 + console_log (Printf.sprintf "clear_highlight: highlighted is %s" 78 + (if t.highlighted = None then "None" else "Some")); 74 79 match t.highlighted with 75 80 | Some el -> 81 + console_log "clear_highlight: unhighlighting element"; 76 82 Htmlrw_js_annotate.unhighlight_element el; 77 - t.highlighted <- None 78 - | None -> () 83 + t.highlighted <- None; 84 + console_log "clear_highlight: done" 85 + | None -> 86 + console_log "clear_highlight: nothing to clear" 79 87 80 88 let navigate_to_element t bm = 81 89 clear_highlight t; ··· 190 198 El.set_inline_style (Jstr.v "display") (Jstr.v "none") t.root 191 199 192 200 let destroy t = 201 + console_log "destroy: starting"; 202 + clear_highlight t; 203 + console_log "destroy: cleared highlight"; 204 + (* Clear _current_panel before removing element to avoid comparison issues *) 205 + (match !_current_panel with 206 + | Some p when p.root == t.root -> _current_panel := None 207 + | _ -> ()); 208 + console_log "destroy: cleared current_panel ref"; 193 209 El.remove t.root; 194 - if !_current_panel = Some t then _current_panel := None 210 + console_log "destroy: removed root element, done" 195 211 196 212 let hide_current () = 197 - match !_current_panel with Some t -> destroy t | None -> () 213 + console_log (Printf.sprintf "hide_current: current_panel is %s" 214 + (if !_current_panel = None then "None" else "Some")); 215 + match !_current_panel with 216 + | Some t -> 217 + console_log "hide_current: destroying existing panel"; 218 + destroy t 219 + | None -> 220 + console_log "hide_current: no panel to destroy" 198 221 199 222 let create ~config result = 223 + console_log (Printf.sprintf "create: starting with %d messages" (List.length result.messages)); 200 224 hide_current (); 225 + console_log "create: hide_current done"; 201 226 202 227 let _doc = G.document in 203 228 204 229 let title = El.v (Jstr.v "span") [El.txt' "HTML5 Validation"] in 205 230 206 - let collapse_btn = El.v (Jstr.v "button") ~at:[At.class' Css_class.collapse_btn] [ 207 - El.txt' "_" 208 - ] in 209 - 210 231 let close_btn = El.v (Jstr.v "button") ~at:[At.class' Css_class.close_btn] [ 211 232 El.txt' "x" 212 233 ] in 213 234 214 235 let header = El.v (Jstr.v "div") ~at:[At.class' Css_class.panel_header] [ 215 - title; collapse_btn; close_btn 236 + title; close_btn 216 237 ] in 217 238 218 239 let error_count = List.length (List.filter (fun bm -> ··· 268 289 269 290 update t result; 270 291 271 - ignore (Ev.listen Ev.click (fun _ -> toggle_collapsed t) (El.as_target collapse_btn)); 292 + (* Stop mousedown from bubbling to header (prevents drag interference) *) 293 + ignore (Ev.listen Ev.mousedown (fun ev -> 294 + console_log "close_btn: mousedown, stopping propagation"; 295 + Ev.stop_propagation ev 296 + ) (El.as_target close_btn)); 272 297 273 - ignore (Ev.listen Ev.click (fun _ -> 298 + ignore (Ev.listen Ev.click (fun ev -> 299 + console_log "close_btn: click handler starting"; 300 + Ev.stop_propagation ev; 301 + console_log "close_btn: stopped propagation, calling destroy"; 274 302 destroy t; 275 - match t.on_close with Some f -> f () | None -> () 303 + console_log "close_btn: destroy done, checking on_close callback"; 304 + (match t.on_close with Some f -> f () | None -> ()); 305 + console_log "close_btn: click handler done" 276 306 ) (El.as_target close_btn)); 277 307 278 308 if config.draggable then begin ··· 307 337 if config.start_collapsed then 308 338 El.set_class Css_class.panel_collapsed true root; 309 339 340 + console_log "create: appending panel to document body"; 310 341 El.append_children (Document.body G.document) [root]; 311 342 312 343 _current_panel := Some t; 344 + console_log "create: panel creation complete"; 313 345 t 314 346 315 347 let on_warning_click t f = t.on_warning_click <- Some f ··· 381 413 width: 24px; height: 24px; margin-left: 8px; 382 414 border: none; border-radius: 4px; 383 415 background: transparent; color: var(--html5rw-panel-text); 384 - cursor: pointer; font-size: 14px; line-height: 1; 416 + cursor: pointer; font-size: 14px; 417 + display: flex; align-items: center; justify-content: center; 385 418 } 386 419 387 420 .html5rw-panel-header button:hover { background: rgba(0, 0, 0, 0.1); }
+668
test-regression.html
··· 1 + <!DOCTYPE html> 2 + <html lang="en"> 3 + <head> 4 + <meta charset="UTF-8"> 5 + <meta name="viewport" content="width=device-width, initial-scale=1.0"> 6 + <title>HTML5rw Regression Test Suite</title> 7 + <style> 8 + :root { 9 + --bg-primary: #1a1a2e; 10 + --bg-secondary: #16213e; 11 + --bg-tertiary: #0f3460; 12 + --text-primary: #eee; 13 + --text-secondary: #aaa; 14 + --text-muted: #666; 15 + --accent: #e94560; 16 + --accent-light: #ff6b8a; 17 + --success: #4ade80; 18 + --success-dim: rgba(74, 222, 128, 0.2); 19 + --failure: #f87171; 20 + --failure-dim: rgba(248, 113, 113, 0.2); 21 + --warning: #fbbf24; 22 + --info: #60a5fa; 23 + --border: #333; 24 + --code-bg: #0d1117; 25 + } 26 + 27 + * { box-sizing: border-box; margin: 0; padding: 0; } 28 + 29 + body { 30 + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; 31 + background: var(--bg-primary); 32 + color: var(--text-primary); 33 + line-height: 1.6; 34 + padding: 20px; 35 + } 36 + 37 + .container { max-width: 1400px; margin: 0 auto; } 38 + 39 + .hero { 40 + background: linear-gradient(135deg, var(--bg-secondary) 0%, var(--bg-tertiary) 100%); 41 + padding: 30px 40px; 42 + border-radius: 12px; 43 + margin-bottom: 30px; 44 + border: 1px solid var(--border); 45 + } 46 + 47 + .hero h1 { 48 + font-size: 2rem; 49 + margin-bottom: 10px; 50 + color: var(--accent); 51 + } 52 + 53 + .hero p { color: var(--text-secondary); margin-bottom: 15px; } 54 + 55 + .controls { 56 + display: flex; 57 + gap: 12px; 58 + flex-wrap: wrap; 59 + align-items: center; 60 + } 61 + 62 + button { 63 + padding: 12px 24px; 64 + border: none; 65 + border-radius: 8px; 66 + background: var(--accent); 67 + color: white; 68 + cursor: pointer; 69 + font-size: 14px; 70 + font-weight: 600; 71 + transition: all 0.2s; 72 + } 73 + 74 + button:hover { background: var(--accent-light); transform: translateY(-1px); } 75 + button:disabled { opacity: 0.5; cursor: not-allowed; transform: none; } 76 + button.secondary { background: var(--bg-tertiary); border: 1px solid var(--border); } 77 + button.secondary:hover { background: var(--bg-secondary); } 78 + 79 + select { 80 + padding: 12px 16px; 81 + border: 1px solid var(--border); 82 + border-radius: 8px; 83 + background: var(--bg-secondary); 84 + color: var(--text-primary); 85 + font-size: 14px; 86 + } 87 + 88 + .summary-grid { 89 + display: grid; 90 + grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); 91 + gap: 20px; 92 + margin-bottom: 30px; 93 + } 94 + 95 + .summary-card { 96 + background: var(--bg-secondary); 97 + border-radius: 12px; 98 + padding: 20px; 99 + border: 1px solid var(--border); 100 + text-align: center; 101 + } 102 + 103 + .summary-card h3 { 104 + font-size: 0.8rem; 105 + text-transform: uppercase; 106 + letter-spacing: 1px; 107 + color: var(--text-secondary); 108 + margin-bottom: 8px; 109 + } 110 + 111 + .summary-card .value { 112 + font-size: 2rem; 113 + font-weight: 700; 114 + } 115 + 116 + .summary-card .value.success { color: var(--success); } 117 + .summary-card .value.failure { color: var(--failure); } 118 + .summary-card .value.neutral { color: var(--text-primary); } 119 + 120 + .progress-container { 121 + background: var(--bg-secondary); 122 + border-radius: 12px; 123 + padding: 20px; 124 + margin-bottom: 30px; 125 + border: 1px solid var(--border); 126 + } 127 + 128 + .progress-bar { 129 + height: 24px; 130 + background: var(--failure-dim); 131 + border-radius: 12px; 132 + overflow: hidden; 133 + margin-top: 10px; 134 + } 135 + 136 + .progress-fill { 137 + height: 100%; 138 + background: var(--success); 139 + border-radius: 12px; 140 + transition: width 0.3s ease; 141 + display: flex; 142 + align-items: center; 143 + justify-content: center; 144 + font-size: 12px; 145 + font-weight: 600; 146 + } 147 + 148 + .status-text { 149 + font-size: 14px; 150 + color: var(--text-secondary); 151 + margin-bottom: 8px; 152 + } 153 + 154 + .results-section { 155 + background: var(--bg-secondary); 156 + border-radius: 12px; 157 + margin-bottom: 20px; 158 + border: 1px solid var(--border); 159 + overflow: hidden; 160 + } 161 + 162 + .results-header { 163 + padding: 16px 20px; 164 + background: var(--bg-tertiary); 165 + cursor: pointer; 166 + display: flex; 167 + justify-content: space-between; 168 + align-items: center; 169 + } 170 + 171 + .results-header:hover { background: #1a4a7a; } 172 + 173 + .results-header h2 { 174 + font-size: 1rem; 175 + display: flex; 176 + align-items: center; 177 + gap: 10px; 178 + } 179 + 180 + .results-header .toggle { color: var(--text-secondary); transition: transform 0.2s; } 181 + .results-header.collapsed .toggle { transform: rotate(-90deg); } 182 + 183 + .results-stats { 184 + display: flex; 185 + gap: 15px; 186 + font-size: 14px; 187 + } 188 + 189 + .results-stats .passed { color: var(--success); } 190 + .results-stats .failed { color: var(--failure); } 191 + 192 + .results-content { padding: 15px; } 193 + .results-content.hidden { display: none; } 194 + 195 + .test-item { 196 + margin: 6px 0; 197 + border: 1px solid var(--border); 198 + border-radius: 6px; 199 + overflow: hidden; 200 + } 201 + 202 + .test-header { 203 + padding: 10px 14px; 204 + cursor: pointer; 205 + display: flex; 206 + justify-content: space-between; 207 + align-items: center; 208 + background: var(--bg-primary); 209 + font-size: 13px; 210 + } 211 + 212 + .test-header:hover { background: rgba(255,255,255,0.03); } 213 + 214 + .test-header .status { 215 + width: 8px; 216 + height: 8px; 217 + border-radius: 50%; 218 + margin-right: 10px; 219 + flex-shrink: 0; 220 + } 221 + 222 + .test-header .status.passed { background: var(--success); } 223 + .test-header .status.failed { background: var(--failure); } 224 + 225 + .test-header .test-info { flex: 1; display: flex; align-items: center; min-width: 0; } 226 + .test-header .test-num { font-weight: 600; margin-right: 10px; color: var(--text-muted); } 227 + .test-header .test-desc { white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } 228 + .test-header .expand-icon { color: var(--text-muted); font-size: 0.75rem; } 229 + 230 + .test-details { 231 + padding: 15px; 232 + background: var(--code-bg); 233 + border-top: 1px solid var(--border); 234 + display: none; 235 + font-size: 13px; 236 + } 237 + 238 + .test-details.visible { display: block; } 239 + 240 + .detail-section { margin-bottom: 15px; } 241 + .detail-section:last-child { margin-bottom: 0; } 242 + 243 + .detail-section h4 { 244 + font-size: 11px; 245 + text-transform: uppercase; 246 + letter-spacing: 1px; 247 + color: var(--text-muted); 248 + margin-bottom: 8px; 249 + } 250 + 251 + .detail-section pre { 252 + background: var(--bg-secondary); 253 + padding: 12px; 254 + border-radius: 6px; 255 + overflow-x: auto; 256 + font-family: 'Monaco', 'Menlo', monospace; 257 + font-size: 12px; 258 + white-space: pre-wrap; 259 + word-break: break-word; 260 + max-height: 300px; 261 + overflow-y: auto; 262 + border: 1px solid var(--border); 263 + } 264 + 265 + .detail-row { 266 + display: grid; 267 + grid-template-columns: 1fr 1fr; 268 + gap: 15px; 269 + } 270 + 271 + .filter-controls { 272 + display: flex; 273 + gap: 10px; 274 + margin-bottom: 20px; 275 + flex-wrap: wrap; 276 + } 277 + 278 + .filter-controls input[type="search"] { 279 + padding: 10px 14px; 280 + border: 1px solid var(--border); 281 + border-radius: 8px; 282 + background: var(--bg-secondary); 283 + color: var(--text-primary); 284 + font-size: 14px; 285 + width: 250px; 286 + } 287 + 288 + .log-output { 289 + background: var(--code-bg); 290 + border: 1px solid var(--border); 291 + border-radius: 8px; 292 + padding: 15px; 293 + font-family: 'Monaco', 'Menlo', monospace; 294 + font-size: 12px; 295 + max-height: 200px; 296 + overflow-y: auto; 297 + white-space: pre-wrap; 298 + margin-bottom: 20px; 299 + } 300 + 301 + @media (max-width: 768px) { 302 + .detail-row { grid-template-columns: 1fr; } 303 + .summary-grid { grid-template-columns: 1fr 1fr; } 304 + } 305 + </style> 306 + </head> 307 + <body> 308 + <div class="container"> 309 + <div class="hero"> 310 + <h1>HTML5rw Regression Test Suite</h1> 311 + <p> 312 + Browser-based regression testing for the HTML5rw OCaml parser. 313 + Tests are loaded from the html5lib-tests conformance suite. 314 + </p> 315 + <div class="controls"> 316 + <button id="run-all" onclick="runAllTests()">Run All Tests</button> 317 + <button id="run-tree" class="secondary" onclick="runTreeTests()">Tree Construction Only</button> 318 + <button id="run-encoding" class="secondary" onclick="runEncodingTests()">Encoding Only</button> 319 + <select id="mode-select"> 320 + <option value="js">JavaScript (js_of_ocaml)</option> 321 + <option value="wasm">WebAssembly (wasm_of_ocaml)</option> 322 + </select> 323 + </div> 324 + </div> 325 + 326 + <div class="summary-grid" id="summary" style="display: none;"> 327 + <div class="summary-card"> 328 + <h3>Total Tests</h3> 329 + <div class="value neutral" id="total-count">0</div> 330 + </div> 331 + <div class="summary-card"> 332 + <h3>Passed</h3> 333 + <div class="value success" id="passed-count">0</div> 334 + </div> 335 + <div class="summary-card"> 336 + <h3>Failed</h3> 337 + <div class="value failure" id="failed-count">0</div> 338 + </div> 339 + <div class="summary-card"> 340 + <h3>Pass Rate</h3> 341 + <div class="value" id="pass-rate">0%</div> 342 + </div> 343 + </div> 344 + 345 + <div class="progress-container" id="progress-container"> 346 + <div class="status-text" id="status-text">Ready to run tests. Click a button above to start.</div> 347 + <div class="progress-bar"> 348 + <div class="progress-fill" id="progress-fill" style="width: 0%"></div> 349 + </div> 350 + </div> 351 + 352 + <div class="log-output" id="log-output">Waiting for tests to start...</div> 353 + 354 + <div class="filter-controls" id="filter-controls" style="display: none;"> 355 + <input type="search" id="search" placeholder="Search tests..."> 356 + <select id="filter"> 357 + <option value="all">All Tests</option> 358 + <option value="passed">Passed Only</option> 359 + <option value="failed">Failed Only</option> 360 + </select> 361 + <button class="secondary" onclick="expandAll()">Expand All</button> 362 + <button class="secondary" onclick="collapseAll()">Collapse All</button> 363 + </div> 364 + 365 + <div id="results-container"></div> 366 + </div> 367 + 368 + <script> 369 + // Test file lists 370 + const TREE_CONSTRUCTION_FILES = [ 371 + "adoption01.dat", "adoption02.dat", "blocks.dat", "comments01.dat", 372 + "doctype01.dat", "domjs-unsafe.dat", "entities01.dat", "entities02.dat", 373 + "foreign-fragment.dat", "html5test-com.dat", "inbody01.dat", "isindex.dat", 374 + "main-element.dat", "math.dat", "menuitem-element.dat", "namespace-sensitivity.dat", 375 + "noscript01.dat", "pending-spec-changes-plain-text-unsafe.dat", 376 + "pending-spec-changes.dat", "plain-text-unsafe.dat", "quirks01.dat", "ruby.dat", 377 + "scriptdata01.dat", "search-element.dat", "svg.dat", "tables01.dat", 378 + "template.dat", "tests_innerHTML_1.dat", "tests1.dat", "tests10.dat", 379 + "tests11.dat", "tests12.dat", "tests14.dat", "tests15.dat", "tests16.dat", 380 + "tests17.dat", "tests18.dat", "tests19.dat", "tests2.dat", "tests20.dat", 381 + "tests21.dat", "tests22.dat", "tests23.dat", "tests24.dat", "tests25.dat", 382 + "tests26.dat", "tests3.dat", "tests4.dat", "tests5.dat", "tests6.dat", 383 + "tests7.dat", "tests8.dat", "tests9.dat", "tricky01.dat", "webkit01.dat", 384 + "webkit02.dat" 385 + ]; 386 + 387 + const ENCODING_FILES = [ 388 + "test-yahoo-jp.dat", "tests1.dat", "tests2.dat" 389 + ]; 390 + 391 + let testRunner = null; 392 + let isRunning = false; 393 + 394 + function log(msg) { 395 + const output = document.getElementById('log-output'); 396 + output.textContent += msg + '\n'; 397 + output.scrollTop = output.scrollHeight; 398 + } 399 + 400 + function clearLog() { 401 + document.getElementById('log-output').textContent = ''; 402 + } 403 + 404 + function updateProgress(current, total, msg) { 405 + const pct = total > 0 ? (current / total * 100) : 0; 406 + document.getElementById('progress-fill').style.width = pct + '%'; 407 + document.getElementById('status-text').textContent = msg || `Running: ${current}/${total}`; 408 + } 409 + 410 + function updateSummary(passed, failed) { 411 + const total = passed + failed; 412 + const rate = total > 0 ? (passed / total * 100).toFixed(1) : 0; 413 + document.getElementById('total-count').textContent = total; 414 + document.getElementById('passed-count').textContent = passed; 415 + document.getElementById('failed-count').textContent = failed; 416 + document.getElementById('pass-rate').textContent = rate + '%'; 417 + document.getElementById('pass-rate').className = 'value ' + (rate >= 99 ? 'success' : rate >= 90 ? 'neutral' : 'failure'); 418 + document.getElementById('summary').style.display = 'grid'; 419 + } 420 + 421 + async function loadTestRunner() { 422 + const mode = document.getElementById('mode-select').value; 423 + const scriptName = mode === 'wasm' ? 'htmlrw-tests.wasm.js' : 'htmlrw-tests.js'; 424 + 425 + if (typeof html5rwTests !== 'undefined') { 426 + return true; 427 + } 428 + 429 + log(`Loading ${scriptName}...`); 430 + try { 431 + await new Promise((resolve, reject) => { 432 + const script = document.createElement('script'); 433 + script.src = `_build/default/lib/js/${scriptName}`; 434 + script.onload = resolve; 435 + script.onerror = () => reject(new Error(`Failed to load ${scriptName}`)); 436 + document.head.appendChild(script); 437 + }); 438 + 439 + // Wait for initialization 440 + await new Promise(resolve => setTimeout(resolve, 100)); 441 + 442 + if (typeof html5rwTests === 'undefined') { 443 + throw new Error('Test runner not initialized'); 444 + } 445 + 446 + log(`Test runner loaded (version ${html5rwTests.version})`); 447 + return true; 448 + } catch (e) { 449 + log(`ERROR: ${e.message}`); 450 + log('Make sure to run: opam exec -- dune build lib/js/htmlrw-tests.js'); 451 + return false; 452 + } 453 + } 454 + 455 + async function fetchTestFile(type, filename) { 456 + const basePath = type === 'tree-construction' 457 + ? 'html5lib-tests/tree-construction/' 458 + : 'html5lib-tests/encoding/'; 459 + const url = basePath + filename; 460 + const response = await fetch(url); 461 + if (!response.ok) { 462 + throw new Error(`Failed to fetch ${url}: ${response.status}`); 463 + } 464 + return await response.text(); 465 + } 466 + 467 + function renderFileResult(result) { 468 + const section = document.createElement('div'); 469 + section.className = 'results-section'; 470 + section.dataset.filename = result.filename; 471 + 472 + const collapsed = result.failedCount === 0 ? 'collapsed' : ''; 473 + const hidden = result.failedCount === 0 ? 'hidden' : ''; 474 + 475 + section.innerHTML = ` 476 + <div class="results-header ${collapsed}"> 477 + <h2><span class="toggle">โ–ผ</span> ${escapeHtml(result.filename)}</h2> 478 + <div class="results-stats"> 479 + <span class="passed">โœ“ ${result.passedCount}</span> 480 + <span class="failed">โœ— ${result.failedCount}</span> 481 + </div> 482 + </div> 483 + <div class="results-content ${hidden}"> 484 + ${result.tests.map(renderTestResult).join('')} 485 + </div> 486 + `; 487 + 488 + // Add toggle handler 489 + section.querySelector('.results-header').addEventListener('click', function() { 490 + this.classList.toggle('collapsed'); 491 + this.nextElementSibling.classList.toggle('hidden'); 492 + }); 493 + 494 + // Add test detail handlers 495 + section.querySelectorAll('.test-header').forEach(header => { 496 + header.addEventListener('click', function(e) { 497 + e.stopPropagation(); 498 + const details = this.nextElementSibling; 499 + details.classList.toggle('visible'); 500 + const icon = this.querySelector('.expand-icon'); 501 + icon.textContent = details.classList.contains('visible') ? 'โ–ฒ' : 'โ–ผ'; 502 + }); 503 + }); 504 + 505 + return section; 506 + } 507 + 508 + function renderTestResult(test) { 509 + const statusClass = test.success ? 'passed' : 'failed'; 510 + return ` 511 + <div class="test-item" data-passed="${test.success}"> 512 + <div class="test-header"> 513 + <div class="test-info"> 514 + <span class="status ${statusClass}"></span> 515 + <span class="test-num">#${test.testNum}</span> 516 + <span class="test-desc">${escapeHtml(test.description)}</span> 517 + </div> 518 + <span class="expand-icon">โ–ผ</span> 519 + </div> 520 + <div class="test-details"> 521 + <div class="detail-section"> 522 + <h4>Input</h4> 523 + <pre>${escapeHtml(test.input)}</pre> 524 + </div> 525 + <div class="detail-row"> 526 + <div class="detail-section"> 527 + <h4>Expected</h4> 528 + <pre>${escapeHtml(test.expected)}</pre> 529 + </div> 530 + <div class="detail-section"> 531 + <h4>Actual</h4> 532 + <pre>${escapeHtml(test.actual)}</pre> 533 + </div> 534 + </div> 535 + </div> 536 + </div> 537 + `; 538 + } 539 + 540 + function escapeHtml(str) { 541 + const div = document.createElement('div'); 542 + div.textContent = str; 543 + return div.innerHTML; 544 + } 545 + 546 + async function runTests(testType, files, basePath) { 547 + if (isRunning) return; 548 + isRunning = true; 549 + 550 + clearLog(); 551 + document.getElementById('results-container').innerHTML = ''; 552 + document.getElementById('filter-controls').style.display = 'none'; 553 + 554 + const buttons = document.querySelectorAll('button'); 555 + buttons.forEach(b => b.disabled = true); 556 + 557 + try { 558 + if (!await loadTestRunner()) { 559 + return; 560 + } 561 + 562 + log(`Starting ${testType} tests...`); 563 + let totalPassed = 0; 564 + let totalFailed = 0; 565 + const allResults = []; 566 + 567 + for (let i = 0; i < files.length; i++) { 568 + const filename = files[i]; 569 + updateProgress(i, files.length, `Loading ${filename}...`); 570 + 571 + try { 572 + const content = await fetchTestFile(basePath, filename); 573 + log(`Running ${filename}...`); 574 + 575 + let result; 576 + if (basePath === 'tree-construction') { 577 + result = html5rwTests.runTreeConstructionTest(filename, content); 578 + } else { 579 + result = html5rwTests.runEncodingTest(filename, content); 580 + } 581 + 582 + totalPassed += result.passedCount; 583 + totalFailed += result.failedCount; 584 + allResults.push(result); 585 + 586 + log(` ${filename}: ${result.passedCount} passed, ${result.failedCount} failed`); 587 + 588 + // Render result immediately 589 + const section = renderFileResult(result); 590 + document.getElementById('results-container').appendChild(section); 591 + 592 + } catch (e) { 593 + log(` ERROR loading ${filename}: ${e.message}`); 594 + } 595 + 596 + updateSummary(totalPassed, totalFailed); 597 + updateProgress(i + 1, files.length); 598 + } 599 + 600 + updateProgress(files.length, files.length, `Complete: ${totalPassed} passed, ${totalFailed} failed`); 601 + log(`\n=== SUMMARY ===`); 602 + log(`Total: ${totalPassed + totalFailed} tests`); 603 + log(`Passed: ${totalPassed}`); 604 + log(`Failed: ${totalFailed}`); 605 + log(`Pass rate: ${((totalPassed / (totalPassed + totalFailed)) * 100).toFixed(2)}%`); 606 + 607 + document.getElementById('filter-controls').style.display = 'flex'; 608 + setupFilters(); 609 + 610 + } finally { 611 + isRunning = false; 612 + buttons.forEach(b => b.disabled = false); 613 + } 614 + } 615 + 616 + function runAllTests() { 617 + // Run both tree and encoding tests 618 + runTests('all', TREE_CONSTRUCTION_FILES.concat(ENCODING_FILES.map(f => 'encoding/' + f)), 'tree-construction'); 619 + } 620 + 621 + async function runTreeTests() { 622 + await runTests('tree-construction', TREE_CONSTRUCTION_FILES, 'tree-construction'); 623 + } 624 + 625 + async function runEncodingTests() { 626 + await runTests('encoding', ENCODING_FILES, 'encoding'); 627 + } 628 + 629 + function setupFilters() { 630 + const search = document.getElementById('search'); 631 + const filter = document.getElementById('filter'); 632 + 633 + search.addEventListener('input', applyFilters); 634 + filter.addEventListener('change', applyFilters); 635 + } 636 + 637 + function applyFilters() { 638 + const query = document.getElementById('search').value.toLowerCase(); 639 + const filterValue = document.getElementById('filter').value; 640 + 641 + document.querySelectorAll('.test-item').forEach(item => { 642 + const text = item.textContent.toLowerCase(); 643 + const passed = item.dataset.passed === 'true'; 644 + let visible = true; 645 + 646 + if (query && !text.includes(query)) visible = false; 647 + if (filterValue === 'passed' && !passed) visible = false; 648 + if (filterValue === 'failed' && passed) visible = false; 649 + 650 + item.style.display = visible ? '' : 'none'; 651 + }); 652 + } 653 + 654 + function expandAll() { 655 + document.querySelectorAll('.results-header.collapsed').forEach(h => h.click()); 656 + } 657 + 658 + function collapseAll() { 659 + document.querySelectorAll('.results-header:not(.collapsed)').forEach(h => h.click()); 660 + } 661 + 662 + // Quick test on load 663 + window.addEventListener('load', function() { 664 + log('Ready. Select a test mode and click Run to begin.'); 665 + }); 666 + </script> 667 + </body> 668 + </html>