OCaml HTML5 parser/serialiser based on Python's JustHTML

validator

Changed files
+12649 -43
bin
html5check
lib
html5_checker
content_model
datatype
semantic
specialized
html5rw
test
+4
bin/html5check/dune
··· 1 + (executable 2 + (name html5check) 3 + (public_name html5check) 4 + (libraries html5_checker html5rw bytesrw cmdliner))
+168
bin/html5check/html5check.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** html5check - HTML5 conformance checker CLI 7 + 8 + Command line interface for validating HTML5 documents. *) 9 + 10 + open Cmdliner 11 + 12 + let version = "0.1.0" 13 + 14 + (** Exit codes *) 15 + module Exit_code = struct 16 + let ok = Cmd.Exit.ok 17 + let validation_errors = 1 18 + let io_error = 2 19 + end 20 + 21 + (** Read input from file or stdin *) 22 + let read_input file = 23 + try 24 + let ic = 25 + if file = "-" then stdin 26 + else open_in file 27 + in 28 + let reader = Bytesrw.Bytes.Reader.of_in_channel ic in 29 + Ok (reader, ic, file) 30 + with 31 + | Sys_error msg -> 32 + Error (`Io_error (Printf.sprintf "Cannot read file '%s': %s" file msg)) 33 + 34 + (** Format output based on the requested format *) 35 + let format_output format result = 36 + match format with 37 + | `Text -> Html5_checker.format_text result 38 + | `Json -> Html5_checker.format_json result 39 + | `Gnu -> Html5_checker.format_gnu result 40 + 41 + (** Run the validation *) 42 + let run format errors_only exit_zero quiet verbose file = 43 + match read_input file with 44 + | Error (`Io_error msg) -> 45 + if not quiet then Printf.eprintf "Error: %s\n" msg; 46 + Exit_code.io_error 47 + | Ok (reader, ic, system_id) -> 48 + (* Run validation *) 49 + let result = Html5_checker.check ~system_id reader in 50 + 51 + (* Close input if it's not stdin *) 52 + if file <> "-" then close_in ic; 53 + 54 + (* Get messages based on filtering *) 55 + let messages = 56 + if errors_only then Html5_checker.errors result 57 + else Html5_checker.messages result 58 + in 59 + 60 + (* Output based on mode *) 61 + if quiet then begin 62 + (* Only show counts *) 63 + let error_count = List.length (Html5_checker.errors result) in 64 + let warning_count = List.length (Html5_checker.warnings result) in 65 + if errors_only then 66 + Printf.printf "%d error%s\n" error_count (if error_count = 1 then "" else "s") 67 + else 68 + Printf.printf "%d error%s, %d warning%s\n" 69 + error_count (if error_count = 1 then "" else "s") 70 + warning_count (if warning_count = 1 then "" else "s") 71 + end else begin 72 + (* Format and print messages *) 73 + let output = format_output format result in 74 + if output <> "" then print_string output; 75 + 76 + (* Show summary if verbose *) 77 + if verbose && messages <> [] then begin 78 + let error_count = List.length (Html5_checker.errors result) in 79 + let warning_count = List.length (Html5_checker.warnings result) in 80 + Printf.eprintf "\nSummary: %d error%s, %d warning%s\n" 81 + error_count (if error_count = 1 then "" else "s") 82 + warning_count (if warning_count = 1 then "" else "s") 83 + end 84 + end; 85 + 86 + (* Determine exit code *) 87 + if exit_zero || not (Html5_checker.has_errors result) then 88 + Exit_code.ok 89 + else 90 + Exit_code.validation_errors 91 + 92 + (** Command line argument definitions *) 93 + 94 + let format_arg = 95 + let formats = [("text", `Text); ("json", `Json); ("gnu", `Gnu)] in 96 + let doc = 97 + "Output format. $(docv) must be one of $(b,text) (human-readable, default), \ 98 + $(b,json) (Nu validator compatible JSON), or $(b,gnu) (GNU-style for IDE integration)." 99 + in 100 + Arg.(value & opt (enum formats) `Text & info ["format"] ~docv:"FORMAT" ~doc) 101 + 102 + let errors_only_arg = 103 + let doc = "Only show errors (suppress warnings)." in 104 + Arg.(value & flag & info ["errors-only"] ~doc) 105 + 106 + let exit_zero_arg = 107 + let doc = 108 + "Always exit with status code 0, even if validation errors are found. \ 109 + Useful for CI pipelines where you want to collect validation results \ 110 + but not fail the build." 111 + in 112 + Arg.(value & flag & info ["exit-zero"] ~doc) 113 + 114 + let quiet_arg = 115 + let doc = "Quiet mode - only show error and warning counts, no details." in 116 + Arg.(value & flag & info ["q"; "quiet"] ~doc) 117 + 118 + let verbose_arg = 119 + let doc = "Verbose mode - show additional information including summary." in 120 + Arg.(value & flag & info ["v"; "verbose"] ~doc) 121 + 122 + let file_arg = 123 + let doc = 124 + "HTML file to validate. Use $(b,-) to read from standard input. \ 125 + If no file is specified, reads from stdin." 126 + in 127 + Arg.(value & pos 0 string "-" & info [] ~docv:"FILE" ~doc) 128 + 129 + let cmd = 130 + let doc = "validate HTML5 documents for conformance" in 131 + let man = [ 132 + `S Manpage.s_description; 133 + `P "$(tname) validates HTML5 documents against the WHATWG HTML5 specification. \ 134 + It reports parse errors, structural validation issues, and conformance problems."; 135 + `P "The validator checks for:"; 136 + `I ("Parse errors", "Malformed HTML syntax according to the WHATWG specification"); 137 + `I ("Content model violations", "Elements in invalid parent/child relationships"); 138 + `I ("Attribute errors", "Invalid or missing required attributes"); 139 + `I ("Structural issues", "Other conformance problems"); 140 + `S Manpage.s_options; 141 + `S "OUTPUT FORMATS"; 142 + `P "The validator supports three output formats:"; 143 + `I ("$(b,text)", "Human-readable format showing file:line:col: severity: message"); 144 + `I ("$(b,json)", "JSON format compatible with the Nu Html Checker (v.Nu)"); 145 + `I ("$(b,gnu)", "GNU-style format for IDE integration (file:line:column: message)"); 146 + `S "EXIT STATUS"; 147 + `P "The validator exits with one of the following status codes:"; 148 + `I ("0", "No validation errors found (or --exit-zero was specified)"); 149 + `I ("1", "Validation errors were found"); 150 + `I ("2", "File not found or I/O error"); 151 + `S Manpage.s_examples; 152 + `P "Validate a file:"; 153 + `Pre " $(mname) index.html"; 154 + `P "Validate from stdin:"; 155 + `Pre " cat page.html | $(mname) -"; 156 + `P "Show only errors in JSON format:"; 157 + `Pre " $(mname) --format=json --errors-only page.html"; 158 + `P "Quiet mode for CI:"; 159 + `Pre " $(mname) --quiet --exit-zero index.html"; 160 + `S Manpage.s_bugs; 161 + `P "Report bugs at https://tangled.org/@anil.recoil.org/ocaml-html5rw/issues"; 162 + ] in 163 + let info = Cmd.info "html5check" ~version ~doc ~man in 164 + Cmd.v info Term.(const run $ format_arg $ errors_only_arg $ exit_zero_arg 165 + $ quiet_arg $ verbose_arg $ file_arg) 166 + 167 + let main () = Cmd.eval' cmd 168 + let () = Stdlib.exit (main ())
+1 -1
dune
··· 1 - (data_only_dirs third_party) 1 + (vendored_dirs third_party)
+2 -1
dune-project
··· 25 25 (uutf (>= 1.0.0)) 26 26 (uuuu (>= 0.3.0)) 27 27 (odoc :with-doc) 28 - (jsont (>= 0.2.0)))) 28 + (jsont (>= 0.2.0)) 29 + (cmdliner (>= 1.3.0))))
+1
html5rw.opam
··· 17 17 "uuuu" {>= "0.3.0"} 18 18 "odoc" {with-doc} 19 19 "jsont" {>= "0.2.0"} 20 + "cmdliner" {>= "1.3.0"} 20 21 ] 21 22 build: [ 22 23 ["dune" "subst"] {dev}
+39
lib/html5_checker/checker.ml
··· 1 + (** Base checker module for HTML5 conformance checking. *) 2 + 3 + module type S = sig 4 + type state 5 + 6 + val create : unit -> state 7 + val reset : state -> unit 8 + 9 + val start_element : 10 + state -> 11 + name:string -> 12 + namespace:string option -> 13 + attrs:(string * string) list -> 14 + Message_collector.t -> 15 + unit 16 + 17 + val end_element : 18 + state -> name:string -> namespace:string option -> Message_collector.t -> unit 19 + 20 + val characters : state -> string -> Message_collector.t -> unit 21 + val end_document : state -> Message_collector.t -> unit 22 + end 23 + 24 + type t = (module S) 25 + 26 + (** No-operation checker implementation. *) 27 + module Noop = struct 28 + type state = unit 29 + 30 + let create () = () 31 + let reset () = () 32 + 33 + let start_element () ~name:_ ~namespace:_ ~attrs:_ _ = () 34 + let end_element () ~name:_ ~namespace:_ _ = () 35 + let characters () _ _ = () 36 + let end_document () _ = () 37 + end 38 + 39 + let noop () = (module Noop : S)
+177
lib/html5_checker/checker.mli
··· 1 + (** Base checker module for HTML5 conformance checking. 2 + 3 + This module provides the core checker abstraction used throughout the 4 + html5_checker library. A checker validates HTML5 documents by observing 5 + DOM tree traversal events and emitting validation messages. 6 + 7 + {2 Design Overview} 8 + 9 + Checkers follow a SAX-like event model where they receive notifications 10 + about elements, text, and document boundaries as a DOM tree is traversed. 11 + This design allows for: 12 + 13 + - {b Stateful validation}: Each checker maintains its own state across 14 + multiple events 15 + - {b Composability}: Multiple checkers can validate the same document 16 + simultaneously 17 + - {b Efficiency}: DOM traversal happens once regardless of checker count 18 + 19 + {2 Checker Lifecycle} 20 + 21 + A checker progresses through these phases: 22 + 23 + 1. {b Creation}: Initialize with {!create} to set up initial state 24 + 2. {b Traversal}: Receive {!start_element}, {!characters}, and 25 + {!end_element} events as the DOM is walked 26 + 3. {b Completion}: Finalize validation with {!end_document} 27 + 4. {b Reset} (optional): Return to initial state with {!reset} 28 + 29 + {3 Event Sequence} 30 + 31 + For a document like [<p>Hello <b>world</b></p>], the event sequence is: 32 + 33 + {v 34 + start_element "p" 35 + characters "Hello " 36 + start_element "b" 37 + characters "world" 38 + end_element "b" 39 + end_element "p" 40 + end_document 41 + v} 42 + 43 + {2 First-Class Modules} 44 + 45 + Checkers are represented as first-class modules implementing the {!S} 46 + signature. This allows: 47 + 48 + - Dynamic checker registration and discovery 49 + - Heterogeneous collections of checkers 50 + - Checker selection at runtime based on validation requirements 51 + 52 + @see <https://v2.ocaml.org/manual/firstclassmodules.html> 53 + OCaml manual: First-class modules 54 + *) 55 + 56 + (** {1 Module Signature} *) 57 + 58 + (** The signature that all checker modules must implement. 59 + 60 + A checker module maintains validation state and receives notifications 61 + about DOM tree traversal events. *) 62 + module type S = sig 63 + (** The type of checker state. 64 + 65 + This is an abstract type that holds the checker's internal validation 66 + state. Different checkers will have different state representations 67 + depending on what they need to track during validation. *) 68 + type state 69 + 70 + (** {1 Lifecycle Operations} *) 71 + 72 + val create : unit -> state 73 + (** [create ()] initializes a new checker state. 74 + 75 + This function sets up the initial state needed for validation, 76 + such as empty stacks for context tracking, counters, or lookup 77 + tables. *) 78 + 79 + val reset : state -> unit 80 + (** [reset state] resets the checker to its initial state. 81 + 82 + This allows reusing a checker for multiple documents without 83 + reallocating. After reset, the checker behaves as if freshly 84 + created with {!create}. *) 85 + 86 + (** {1 DOM Traversal Events} *) 87 + 88 + val start_element : 89 + state -> 90 + name:string -> 91 + namespace:string option -> 92 + attrs:(string * string) list -> 93 + Message_collector.t -> 94 + unit 95 + (** [start_element state ~name ~namespace ~attrs collector] is called when 96 + entering an element during DOM traversal. 97 + 98 + @param state The checker state 99 + @param name The element tag name (e.g., "div", "p", "span") 100 + @param namespace The element namespace ([None] for HTML, [Some "svg"] 101 + for SVG, [Some "mathml"] for MathML) 102 + @param attrs The element's attributes as [(name, value)] pairs 103 + @param collector The message collector for emitting validation messages 104 + 105 + This is where checkers can validate: 106 + - Whether the element is allowed in the current context 107 + - Whether required attributes are present 108 + - Whether attribute values are valid 109 + - Whether the element opens a new validation context *) 110 + 111 + val end_element : 112 + state -> name:string -> namespace:string option -> Message_collector.t -> unit 113 + (** [end_element state ~name ~namespace collector] is called when exiting 114 + an element during DOM traversal. 115 + 116 + @param state The checker state 117 + @param name The element tag name 118 + @param namespace The element namespace 119 + @param collector The message collector for emitting validation messages 120 + 121 + This is where checkers can: 122 + - Pop validation contexts from stacks 123 + - Validate that required child elements were present 124 + - Emit messages about element-scoped validation rules *) 125 + 126 + val characters : state -> string -> Message_collector.t -> unit 127 + (** [characters state text collector] is called when text content is 128 + encountered during DOM traversal. 129 + 130 + @param state The checker state 131 + @param text The text content 132 + @param collector The message collector for emitting validation messages 133 + 134 + This is where checkers can validate: 135 + - Whether text is allowed in the current context 136 + - Whether text content follows specific patterns 137 + - Whether text matches expected formats *) 138 + 139 + val end_document : state -> Message_collector.t -> unit 140 + (** [end_document state collector] is called after the entire DOM tree has 141 + been traversed. 142 + 143 + @param state The checker state 144 + @param collector The message collector for emitting validation messages 145 + 146 + This is where checkers can: 147 + - Emit messages about missing required elements 148 + - Validate document-level constraints 149 + - Check that all opened contexts were properly closed 150 + - Report any accumulated validation failures *) 151 + end 152 + 153 + (** {1 Checker Values} *) 154 + 155 + (** The type of a checker value. 156 + 157 + This is a packed first-class module containing both the checker 158 + implementation and its state. It enables storing heterogeneous 159 + checkers in collections and passing them around dynamically. *) 160 + type t = (module S) 161 + 162 + (** {1 Built-in Checkers} *) 163 + 164 + val noop : unit -> t 165 + (** [noop ()] creates a no-operation checker that performs no validation. 166 + 167 + This checker ignores all events and never emits messages. It is useful: 168 + - As a placeholder in checker registries 169 + - For testing checker infrastructure 170 + - As a base for building new checkers 171 + 172 + {b Example:} 173 + {[ 174 + let checker = noop () in 175 + (* Does nothing when walked over a DOM tree *) 176 + ]} 177 + *)
+22
lib/html5_checker/checker_registry.ml
··· 1 + (** Registry for HTML5 conformance checkers. *) 2 + 3 + type t = (string, Checker.t) Hashtbl.t 4 + 5 + let create () = Hashtbl.create 16 6 + 7 + let default () = 8 + (* In Phase 1, return an empty registry. 9 + Built-in checkers will be added in later phases. *) 10 + create () 11 + 12 + let register registry name checker = Hashtbl.replace registry name checker 13 + 14 + let unregister registry name = Hashtbl.remove registry name 15 + 16 + let get registry name = Hashtbl.find_opt registry name 17 + 18 + let list_names registry = 19 + Hashtbl.to_seq_keys registry |> List.of_seq 20 + 21 + let all registry = 22 + Hashtbl.to_seq_values registry |> List.of_seq
+156
lib/html5_checker/checker_registry.mli
··· 1 + (** Registry for HTML5 conformance checkers. 2 + 3 + This module provides a dynamic registry for managing collections of 4 + checkers. It enables: 5 + 6 + - {b Registration}: Add checkers under descriptive names 7 + - {b Discovery}: Retrieve checkers by name or list all available ones 8 + - {b Lifecycle management}: Register and unregister checkers at runtime 9 + - {b Defaults}: Access a pre-configured set of built-in checkers 10 + 11 + {2 Design Rationale} 12 + 13 + The registry pattern separates checker implementation from checker usage. 14 + Applications can: 15 + 16 + 1. Query available checkers to present options to users 17 + 2. Select specific checkers based on validation requirements 18 + 3. Add custom checkers without modifying library code 19 + 4. Share checker configurations across validation runs 20 + 21 + {2 Usage Pattern} 22 + 23 + {[ 24 + (* Start with default checkers *) 25 + let reg = default () in 26 + 27 + (* Add a custom checker *) 28 + let my_checker = (module MyChecker : Checker.S) in 29 + register reg "my-custom-check" my_checker; 30 + 31 + (* List all available checkers *) 32 + let names = list_names reg in 33 + List.iter (Printf.printf "Available: %s\n") names; 34 + 35 + (* Retrieve a specific checker *) 36 + match get reg "my-custom-check" with 37 + | Some checker -> (* Use the checker *) 38 + | None -> (* Not found *) 39 + 40 + (* Get all checkers for validation *) 41 + let all_checkers = all reg in 42 + (* Pass to dom_walker *) 43 + ]} 44 + 45 + {2 Thread Safety} 46 + 47 + This registry is not thread-safe. If shared across threads, external 48 + synchronization is required. *) 49 + 50 + (** {1 Types} *) 51 + 52 + (** The type of a checker registry. 53 + 54 + This is an opaque type representing a mutable collection of named 55 + checkers. Internally implemented as a hash table for efficient lookups. *) 56 + type t 57 + 58 + (** {1 Creation} *) 59 + 60 + val create : unit -> t 61 + (** [create ()] creates a new empty checker registry. 62 + 63 + Use this when you want to build a custom set of checkers from scratch, 64 + without any defaults. *) 65 + 66 + val default : unit -> t 67 + (** [default ()] creates a registry with built-in checkers. 68 + 69 + The default registry is initially empty but serves as a starting point 70 + for adding standard validation checkers in future phases. 71 + 72 + Built-in checkers will include: 73 + - Document structure validation 74 + - Attribute validation 75 + - Content model checking 76 + - Accessibility checks 77 + 78 + Note: In Phase 1, the default registry is empty. Built-in checkers 79 + will be added in subsequent phases. *) 80 + 81 + (** {1 Registration} *) 82 + 83 + val register : t -> string -> Checker.t -> unit 84 + (** [register registry name checker] adds a checker to the registry. 85 + 86 + @param registry The registry to add to 87 + @param name A unique identifier for the checker (e.g., "obsolete-elements", 88 + "required-attributes") 89 + @param checker The checker implementation 90 + 91 + If a checker with the same name already exists, it is replaced. 92 + 93 + {b Example:} 94 + {[ 95 + let reg = create () in 96 + let checker = (module MyChecker : Checker.S) in 97 + register reg "my-check" checker 98 + ]} *) 99 + 100 + val unregister : t -> string -> unit 101 + (** [unregister registry name] removes a checker from the registry. 102 + 103 + @param registry The registry to remove from 104 + @param name The checker name 105 + 106 + If no checker with the given name exists, this is a no-op. *) 107 + 108 + (** {1 Retrieval} *) 109 + 110 + val get : t -> string -> Checker.t option 111 + (** [get registry name] retrieves a checker by name. 112 + 113 + @param registry The registry to search 114 + @param name The checker name 115 + @return [Some checker] if found, [None] otherwise 116 + 117 + {b Example:} 118 + {[ 119 + match get reg "obsolete-elements" with 120 + | Some checker -> (* Use checker *) 121 + | None -> (* Checker not registered *) 122 + ]} *) 123 + 124 + val list_names : t -> string list 125 + (** [list_names registry] returns all registered checker names. 126 + 127 + @param registry The registry to query 128 + @return A list of all checker names in arbitrary order 129 + 130 + This is useful for: 131 + - Displaying available checkers to users 132 + - Debugging registry contents 133 + - Iterating over specific subsets of checkers 134 + 135 + {b Example:} 136 + {[ 137 + let names = list_names reg in 138 + Printf.printf "Available checkers: %s\n" 139 + (String.concat ", " names) 140 + ]} *) 141 + 142 + val all : t -> Checker.t list 143 + (** [all registry] returns all registered checkers. 144 + 145 + @param registry The registry to query 146 + @return A list of all checkers in arbitrary order 147 + 148 + This is the primary way to retrieve checkers for validation. 149 + Pass the result to {!Dom_walker.walk_all} to run all registered 150 + checkers on a DOM tree. 151 + 152 + {b Example:} 153 + {[ 154 + let checkers = all reg in 155 + Dom_walker.walk_all checkers collector dom 156 + ]} *)
+25
lib/html5_checker/content_model/attr_spec.ml
··· 1 + type t = { 2 + name : string; 3 + required : bool; 4 + datatype : string option; 5 + enum_values : string list option; 6 + deprecated : bool; 7 + } 8 + 9 + let make name ?(required = false) ?datatype ?enum_values ?(deprecated = false) () = 10 + { name; required; datatype; enum_values; deprecated } 11 + 12 + let pp fmt t = 13 + Format.fprintf fmt "@[<v 2>{ name = %S;@ required = %b;@ " t.name t.required; 14 + (match t.datatype with 15 + | None -> Format.fprintf fmt "datatype = None;@ " 16 + | Some dt -> Format.fprintf fmt "datatype = Some %S;@ " dt); 17 + (match t.enum_values with 18 + | None -> Format.fprintf fmt "enum_values = None;@ " 19 + | Some vals -> 20 + Format.fprintf fmt "enum_values = Some [%a];@ " 21 + (Format.pp_print_list 22 + ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") 23 + (fun fmt s -> Format.fprintf fmt "%S" s)) 24 + vals); 25 + Format.fprintf fmt "deprecated = %b }@]" t.deprecated
+20
lib/html5_checker/content_model/attr_spec.mli
··· 1 + (** HTML5 attribute specifications. *) 2 + 3 + type t = { 4 + name : string; 5 + required : bool; 6 + datatype : string option; (** Datatype name for validation *) 7 + enum_values : string list option; (** Allowed values if enumerated *) 8 + deprecated : bool; 9 + } 10 + 11 + val make : 12 + string -> 13 + ?required:bool -> 14 + ?datatype:string -> 15 + ?enum_values:string list -> 16 + ?deprecated:bool -> 17 + unit -> 18 + t 19 + 20 + val pp : Format.formatter -> t -> unit
+26
lib/html5_checker/content_model/attribute_spec.ml
··· 1 + type requirement = Required | Optional 2 + 3 + type t = { 4 + name : string; 5 + requirement : requirement; 6 + datatype : string option; 7 + description : string; 8 + } 9 + 10 + let create ~name ?(requirement = Optional) ?datatype ~description () = 11 + { name; requirement; datatype; description } 12 + 13 + let requirement_to_string = function 14 + | Required -> "required" 15 + | Optional -> "optional" 16 + 17 + let to_string t = 18 + let req = requirement_to_string t.requirement in 19 + let dt = 20 + match t.datatype with 21 + | None -> "" 22 + | Some d -> Printf.sprintf " (%s)" d 23 + in 24 + Printf.sprintf "%s [%s]%s: %s" t.name req dt t.description 25 + 26 + let pp fmt t = Format.fprintf fmt "%s" (to_string t)
+40
lib/html5_checker/content_model/attribute_spec.mli
··· 1 + (** HTML5 attribute specifications. 2 + 3 + Defines attribute requirements and constraints for HTML5 elements. 4 + See https://html.spec.whatwg.org/multipage/indices.html#attributes-3 *) 5 + 6 + type requirement = 7 + | Required (** Attribute must be present *) 8 + | Optional (** Attribute may be present *) 9 + 10 + type t = { 11 + name : string; 12 + (** Attribute name *) 13 + 14 + requirement : requirement; 15 + (** Whether attribute is required or optional *) 16 + 17 + datatype : string option; 18 + (** Datatype validator name (e.g., "url", "integer", "boolean") *) 19 + 20 + description : string; 21 + (** Human-readable description *) 22 + } 23 + 24 + val create : 25 + name:string -> 26 + ?requirement:requirement -> 27 + ?datatype:string -> 28 + description:string -> 29 + unit -> 30 + t 31 + (** Create an attribute specification. 32 + 33 + @param name Attribute name 34 + @param requirement Whether required or optional (default: Optional) 35 + @param datatype Datatype validator name 36 + @param description Human-readable description *) 37 + 38 + val to_string : t -> string 39 + 40 + val pp : Format.formatter -> t -> unit
+39
lib/html5_checker/content_model/category.ml
··· 1 + type t = 2 + | Metadata 3 + | Flow 4 + | Sectioning 5 + | Heading 6 + | Phrasing 7 + | Embedded 8 + | Interactive 9 + | Palpable 10 + | Script_supporting 11 + | Form_associated 12 + | Listed 13 + | Labelable 14 + | Submittable 15 + | Resettable 16 + | Autocapitalize_inheriting 17 + | Transparent 18 + 19 + let to_string = function 20 + | Metadata -> "metadata" 21 + | Flow -> "flow" 22 + | Sectioning -> "sectioning" 23 + | Heading -> "heading" 24 + | Phrasing -> "phrasing" 25 + | Embedded -> "embedded" 26 + | Interactive -> "interactive" 27 + | Palpable -> "palpable" 28 + | Script_supporting -> "script-supporting" 29 + | Form_associated -> "form-associated" 30 + | Listed -> "listed" 31 + | Labelable -> "labelable" 32 + | Submittable -> "submittable" 33 + | Resettable -> "resettable" 34 + | Autocapitalize_inheriting -> "autocapitalize-inheriting" 35 + | Transparent -> "transparent" 36 + 37 + let compare (a : t) (b : t) = Stdlib.compare a b 38 + 39 + let equal (a : t) (b : t) = a = b
+70
lib/html5_checker/content_model/category.mli
··· 1 + (** HTML5 content categories. 2 + 3 + This module defines the content categories used in HTML5 to classify elements 4 + based on their characteristics and allowed contexts. Elements can belong to 5 + multiple categories. 6 + 7 + @see <https://html.spec.whatwg.org/multipage/dom.html#content-models> WHATWG HTML Specification *) 8 + 9 + (** Content category type. *) 10 + type t = 11 + | Metadata 12 + (** Metadata content sets up the presentation or behavior of the rest of 13 + the content, or sets up the relationship of the document with other 14 + documents, or conveys other "out of band" information. *) 15 + | Flow 16 + (** Most elements that are used in the body of documents and applications 17 + are categorized as flow content. *) 18 + | Sectioning 19 + (** Sectioning content is content that defines the scope of headings and 20 + footers. *) 21 + | Heading 22 + (** Heading content defines the heading of a section (whether explicitly 23 + marked up using sectioning content elements, or implied by the heading 24 + content itself). *) 25 + | Phrasing 26 + (** Phrasing content is the text of the document, as well as elements that 27 + mark up that text at the intra-paragraph level. *) 28 + | Embedded 29 + (** Embedded content is content that imports another resource into the 30 + document, or content from another vocabulary that is inserted into the 31 + document. *) 32 + | Interactive 33 + (** Interactive content is content that is specifically intended for user 34 + interaction. *) 35 + | Palpable 36 + (** As a general rule, elements whose content model allows any flow content 37 + or phrasing content should have at least one node in its contents that 38 + is palpable content and that does not have the hidden attribute specified. *) 39 + | Script_supporting 40 + (** Script-supporting elements are those that do not represent anything 41 + themselves (i.e., they are not rendered), but are used to support scripts. *) 42 + | Form_associated 43 + (** Form-associated elements can have a form owner. *) 44 + | Listed 45 + (** Listed form-associated elements have a form attribute that can point 46 + to a form element. *) 47 + | Labelable 48 + (** Labelable form-associated elements can be associated with label elements. *) 49 + | Submittable 50 + (** Submittable form-associated elements can be used for constructing the 51 + entry list when a form element is submitted. *) 52 + | Resettable 53 + (** Resettable form-associated elements are affected when a form element 54 + is reset. *) 55 + | Autocapitalize_inheriting 56 + (** Some elements inherit the autocapitalize attribute from their form owner. *) 57 + | Transparent 58 + (** Transparent content models adopt the content model of their parent 59 + element. *) 60 + 61 + (** {1 Predicates} *) 62 + 63 + val to_string : t -> string 64 + (** [to_string category] returns a string representation of the category. *) 65 + 66 + val compare : t -> t -> int 67 + (** [compare c1 c2] compares two categories for ordering. *) 68 + 69 + val equal : t -> t -> bool 70 + (** [equal c1 c2] returns [true] if the categories are equal. *)
+48
lib/html5_checker/content_model/content_category.ml
··· 1 + type t = 2 + | Metadata 3 + | Flow 4 + | Sectioning 5 + | Heading 6 + | Phrasing 7 + | Embedded 8 + | Interactive 9 + | Palpable 10 + | Script_supporting 11 + 12 + let to_string = function 13 + | Metadata -> "metadata" 14 + | Flow -> "flow" 15 + | Sectioning -> "sectioning" 16 + | Heading -> "heading" 17 + | Phrasing -> "phrasing" 18 + | Embedded -> "embedded" 19 + | Interactive -> "interactive" 20 + | Palpable -> "palpable" 21 + | Script_supporting -> "script-supporting" 22 + 23 + let of_string = function 24 + | "metadata" -> Some Metadata 25 + | "flow" -> Some Flow 26 + | "sectioning" -> Some Sectioning 27 + | "heading" -> Some Heading 28 + | "phrasing" -> Some Phrasing 29 + | "embedded" -> Some Embedded 30 + | "interactive" -> Some Interactive 31 + | "palpable" -> Some Palpable 32 + | "script-supporting" -> Some Script_supporting 33 + | _ -> None 34 + 35 + let pp fmt t = Format.fprintf fmt "%s" (to_string t) 36 + 37 + let all = 38 + [ 39 + Metadata; 40 + Flow; 41 + Sectioning; 42 + Heading; 43 + Phrasing; 44 + Embedded; 45 + Interactive; 46 + Palpable; 47 + Script_supporting; 48 + ]
+23
lib/html5_checker/content_model/content_category.mli
··· 1 + (** HTML5 content categories. 2 + 3 + Elements are categorized for determining valid parent-child relationships. 4 + See https://html.spec.whatwg.org/multipage/dom.html#content-models *) 5 + 6 + type t = 7 + | Metadata (** head, title, meta, link, style, script, noscript, base *) 8 + | Flow (** Most body elements *) 9 + | Sectioning (** article, aside, nav, section *) 10 + | Heading (** h1-h6, hgroup *) 11 + | Phrasing (** Inline elements: a, em, strong, span, etc. *) 12 + | Embedded (** img, video, audio, canvas, iframe, embed, object *) 13 + | Interactive (** a (with href), button, input, select, textarea, details *) 14 + | Palpable (** Elements with renderable content *) 15 + | Script_supporting (** script, template *) 16 + 17 + val to_string : t -> string 18 + 19 + val of_string : string -> t option 20 + 21 + val pp : Format.formatter -> t -> unit 22 + 23 + val all : t list
+215
lib/html5_checker/content_model/content_checker.ml
··· 1 + type element_context = { 2 + name : string; 3 + spec : Element_spec.t; 4 + children_count : int; 5 + } 6 + 7 + type state = { 8 + registry : Element_registry.t; 9 + mutable ancestor_stack : element_context list; 10 + } 11 + 12 + let create_with_registry ?(registry = Element_registry.default ()) _collector = 13 + { registry; ancestor_stack = [] } 14 + 15 + let create () = create_with_registry (Message_collector.create ()) 16 + 17 + let reset state = 18 + state.ancestor_stack <- [] 19 + 20 + (* Check if an element name matches a content model *) 21 + let rec matches_content_model registry element_name content_model = 22 + match content_model with 23 + | Content_model.Nothing -> false 24 + | Content_model.Text -> false (* Text, not element *) 25 + | Content_model.Transparent -> true (* Inherits parent, allow for now *) 26 + | Content_model.Categories cats -> ( 27 + match Element_registry.get registry element_name with 28 + | None -> false 29 + | Some spec -> 30 + List.exists (fun cat -> Element_spec.has_category spec cat) cats) 31 + | Content_model.Elements names -> 32 + List.mem (String.lowercase_ascii element_name) 33 + (List.map String.lowercase_ascii names) 34 + | Content_model.Mixed cats -> ( 35 + match Element_registry.get registry element_name with 36 + | None -> false 37 + | Some spec -> 38 + List.exists (fun cat -> Element_spec.has_category spec cat) cats) 39 + | Content_model.One_or_more model -> matches_content_model registry element_name model 40 + | Content_model.Zero_or_more model -> matches_content_model registry element_name model 41 + | Content_model.Optional model -> matches_content_model registry element_name model 42 + | Content_model.Sequence models -> 43 + (* For sequences, allow any of the models for now (simplified) *) 44 + List.exists (matches_content_model registry element_name) models 45 + | Content_model.Choice models -> 46 + List.exists (matches_content_model registry element_name) models 47 + | Content_model.Except (model, excluded_cats) -> ( 48 + match Element_registry.get registry element_name with 49 + | None -> matches_content_model registry element_name model 50 + | Some spec -> 51 + matches_content_model registry element_name model 52 + && not (List.exists (fun cat -> Element_spec.has_category spec cat) excluded_cats)) 53 + 54 + (* Check if text is allowed in a content model *) 55 + let rec allows_text content_model = 56 + match content_model with 57 + | Content_model.Nothing -> false 58 + | Content_model.Text -> true 59 + | Content_model.Transparent -> true (* Inherits parent *) 60 + | Content_model.Categories _ -> false (* Elements only *) 61 + | Content_model.Elements _ -> false (* Specific elements only *) 62 + | Content_model.Mixed _ -> true (* Text + elements *) 63 + | Content_model.One_or_more model -> allows_text model 64 + | Content_model.Zero_or_more model -> allows_text model 65 + | Content_model.Optional model -> allows_text model 66 + | Content_model.Sequence models -> List.exists allows_text models 67 + | Content_model.Choice models -> List.exists allows_text models 68 + | Content_model.Except (model, _) -> allows_text model 69 + 70 + (* Check for prohibited ancestors *) 71 + let check_prohibited_ancestors state name spec collector = 72 + List.iter 73 + (fun prohibited -> 74 + if List.exists (fun ctx -> String.equal ctx.name prohibited) state.ancestor_stack then 75 + Message_collector.add_error collector 76 + ~message:(Printf.sprintf "Element '%s' cannot be nested inside '%s'" name prohibited) 77 + ~code:"prohibited-ancestor" 78 + ~element:name 79 + ()) 80 + spec.Element_spec.prohibited_ancestors 81 + 82 + (* Validate that a child element is allowed *) 83 + let validate_child_element state child_name collector = 84 + match state.ancestor_stack with 85 + | [] -> 86 + (* Root level - only html allowed *) 87 + if not (String.equal (String.lowercase_ascii child_name) "html") then 88 + Message_collector.add_error collector 89 + ~message:(Printf.sprintf "Element '%s' not allowed at document root (only 'html' allowed)" child_name) 90 + ~code:"invalid-root-element" 91 + ~element:child_name 92 + () 93 + | parent :: _ -> 94 + let content_model = parent.spec.Element_spec.content_model in 95 + if not (matches_content_model state.registry child_name content_model) then 96 + Message_collector.add_error collector 97 + ~message:(Printf.sprintf 98 + "Element '%s' not allowed as child of '%s' (content model: %s)" 99 + child_name 100 + parent.name 101 + (Content_model.to_string content_model)) 102 + ~code:"invalid-child-element" 103 + ~element:child_name 104 + () 105 + 106 + let start_element state ~name ~namespace:_ ~attrs:_ collector = 107 + (* Look up element specification *) 108 + let spec_opt = Element_registry.get state.registry name in 109 + 110 + match spec_opt with 111 + | None -> 112 + (* Unknown element - emit warning *) 113 + Message_collector.add_warning collector 114 + ~message:(Printf.sprintf "Unknown element '%s'" name) 115 + ~code:"unknown-element" 116 + ~element:name 117 + () 118 + | Some spec -> 119 + (* Check prohibited ancestors *) 120 + check_prohibited_ancestors state name spec collector; 121 + 122 + (* Validate this element is allowed as child of parent *) 123 + validate_child_element state name collector; 124 + 125 + (* Push element context onto stack *) 126 + let context = { name; spec; children_count = 0 } in 127 + state.ancestor_stack <- context :: state.ancestor_stack 128 + 129 + let end_element state ~name ~namespace:_ collector = 130 + match state.ancestor_stack with 131 + | [] -> 132 + (* Unmatched closing tag *) 133 + Message_collector.add_error collector 134 + ~message:(Printf.sprintf "Unmatched closing tag '%s'" name) 135 + ~code:"unmatched-closing-tag" 136 + ~element:name 137 + () 138 + | context :: rest -> 139 + if not (String.equal context.name name) then 140 + (* Mismatched tag *) 141 + Message_collector.add_error collector 142 + ~message:(Printf.sprintf "Expected closing tag '%s' but got '%s'" context.name name) 143 + ~code:"mismatched-closing-tag" 144 + ~element:name 145 + () 146 + else ( 147 + (* Check if void element has children *) 148 + if Element_spec.is_void context.spec && context.children_count > 0 then 149 + Message_collector.add_error collector 150 + ~message:(Printf.sprintf "Void element '%s' must not have children" name) 151 + ~code:"void-element-has-children" 152 + ~element:name 153 + (); 154 + 155 + (* Pop stack *) 156 + state.ancestor_stack <- rest; 157 + 158 + (* If there's a parent, increment its child count *) 159 + match rest with 160 + | [] -> () 161 + | parent :: rest_tail -> 162 + let updated_parent = { parent with children_count = parent.children_count + 1 } in 163 + state.ancestor_stack <- updated_parent :: rest_tail) 164 + 165 + let characters state text collector = 166 + (* Check if text is allowed in current context *) 167 + match state.ancestor_stack with 168 + | [] -> 169 + (* Text at root level - only whitespace allowed *) 170 + if not (String.trim text = "") then 171 + Message_collector.add_error collector 172 + ~message:"Text content not allowed at document root" 173 + ~code:"text-at-root" 174 + () 175 + | parent :: rest -> 176 + let content_model = parent.spec.Element_spec.content_model in 177 + if not (allows_text content_model) then 178 + (* Only report if non-whitespace text *) 179 + if not (String.trim text = "") then 180 + Message_collector.add_error collector 181 + ~message:(Printf.sprintf 182 + "Text content not allowed in '%s' (content model: %s)" 183 + parent.name 184 + (Content_model.to_string content_model)) 185 + ~code:"text-not-allowed" 186 + ~element:parent.name 187 + () 188 + else ( 189 + (* Text is allowed, increment child count *) 190 + let updated_parent = { parent with children_count = parent.children_count + 1 } in 191 + state.ancestor_stack <- updated_parent :: rest) 192 + 193 + let end_document state collector = 194 + (* Check for unclosed elements *) 195 + List.iter 196 + (fun context -> 197 + Message_collector.add_error collector 198 + ~message:(Printf.sprintf "Unclosed element '%s'" context.name) 199 + ~code:"unclosed-element" 200 + ~element:context.name 201 + ()) 202 + state.ancestor_stack 203 + 204 + (* Package as first-class module *) 205 + let checker = 206 + (module struct 207 + type nonrec state = state 208 + 209 + let create = create 210 + let reset = reset 211 + let start_element = start_element 212 + let end_element = end_element 213 + let characters = characters 214 + let end_document = end_document 215 + end : Checker.S)
+57
lib/html5_checker/content_model/content_checker.mli
··· 1 + (** Content model checker. 2 + 3 + Validates that HTML elements conform to their content model specifications. 4 + 5 + The content model checker performs structural validation of HTML documents 6 + by ensuring that: 7 + 8 + - Element children match the element's declared content model 9 + - No prohibited ancestor relationships exist (e.g., no [<a>] inside [<a>]) 10 + - Void elements contain no children 11 + - Required children are present where mandated 12 + 13 + {2 Content Model Validation} 14 + 15 + The checker validates content models by: 16 + 17 + 1. Looking up the element specification in the registry 18 + 2. Checking each child element or text node against the content model 19 + 3. Tracking the ancestor stack to detect prohibited relationships 20 + 4. Emitting appropriate errors or warnings for violations 21 + 22 + {2 Usage Example} 23 + 24 + {[ 25 + let checker = Content_checker.create (Message_collector.create ()) in 26 + let module C = (val checker : Checker.S) in 27 + let state = C.create () in 28 + 29 + (* Walk the DOM tree *) 30 + C.start_element state ~name:"div" ~namespace:None ~attrs:[] collector; 31 + C.characters state "Hello, world!" collector; 32 + C.end_element state ~name:"div" ~namespace:None collector; 33 + C.end_document state collector 34 + ]} 35 + *) 36 + 37 + (** Include the standard checker signature. *) 38 + include Checker.S 39 + 40 + (** {1 Creation} *) 41 + 42 + val create_with_registry : ?registry:Element_registry.t -> Message_collector.t -> state 43 + (** [create_with_registry ?registry collector] creates a content checker with an 44 + optional custom element registry. 45 + 46 + If no registry is provided, uses {!Element_registry.default}. 47 + 48 + @param registry Custom element registry (defaults to standard HTML5 elements) 49 + @param collector Message collector for validation messages *) 50 + 51 + (** {1 First-Class Module} *) 52 + 53 + val checker : Checker.t 54 + (** [checker] is the content checker packaged as a first-class module. 55 + 56 + This allows the content checker to be used in checker registries and 57 + other contexts that work with heterogeneous checker collections. *)
+60
lib/html5_checker/content_model/content_model.ml
··· 1 + type t = 2 + | Nothing 3 + | Text 4 + | Transparent 5 + | Categories of Content_category.t list 6 + | Elements of string list 7 + | Mixed of Content_category.t list 8 + | One_or_more of t 9 + | Zero_or_more of t 10 + | Optional of t 11 + | Sequence of t list 12 + | Choice of t list 13 + | Except of t * Content_category.t list 14 + 15 + let rec pp fmt = function 16 + | Nothing -> Format.fprintf fmt "Nothing" 17 + | Text -> Format.fprintf fmt "Text" 18 + | Transparent -> Format.fprintf fmt "Transparent" 19 + | Categories cats -> 20 + Format.fprintf fmt "Categories [%a]" 21 + (Format.pp_print_list 22 + ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") 23 + Content_category.pp) 24 + cats 25 + | Elements elems -> 26 + Format.fprintf fmt "Elements [%a]" 27 + (Format.pp_print_list 28 + ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") 29 + Format.pp_print_string) 30 + elems 31 + | Mixed cats -> 32 + Format.fprintf fmt "Mixed [%a]" 33 + (Format.pp_print_list 34 + ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") 35 + Content_category.pp) 36 + cats 37 + | One_or_more t -> Format.fprintf fmt "One_or_more (%a)" pp t 38 + | Zero_or_more t -> Format.fprintf fmt "Zero_or_more (%a)" pp t 39 + | Optional t -> Format.fprintf fmt "Optional (%a)" pp t 40 + | Sequence ts -> 41 + Format.fprintf fmt "Sequence [%a]" 42 + (Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") pp) 43 + ts 44 + | Choice ts -> 45 + Format.fprintf fmt "Choice [%a]" 46 + (Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") pp) 47 + ts 48 + | Except (t, cats) -> 49 + Format.fprintf fmt "Except (%a, [%a])" pp t 50 + (Format.pp_print_list 51 + ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") 52 + Content_category.pp) 53 + cats 54 + 55 + let to_string t = 56 + let buf = Buffer.create 256 in 57 + let fmt = Format.formatter_of_buffer buf in 58 + pp fmt t; 59 + Format.pp_print_flush fmt (); 60 + Buffer.contents buf
+21
lib/html5_checker/content_model/content_model.mli
··· 1 + (** HTML5 element content models. 2 + 3 + Defines what children an element can contain. *) 4 + 5 + type t = 6 + | Nothing (** No children allowed (void elements) *) 7 + | Text (** Text only (no elements) *) 8 + | Transparent (** Inherits parent's content model *) 9 + | Categories of Content_category.t list (** Elements from categories *) 10 + | Elements of string list (** Specific elements only *) 11 + | Mixed of Content_category.t list (** Text + elements from categories *) 12 + | One_or_more of t (** At least one child matching *) 13 + | Zero_or_more of t (** Any number of children matching *) 14 + | Optional of t (** Zero or one child matching *) 15 + | Sequence of t list (** Ordered sequence *) 16 + | Choice of t list (** Any one of *) 17 + | Except of t * Content_category.t list (** t except categories *) 18 + 19 + val pp : Format.formatter -> t -> unit 20 + 21 + val to_string : t -> string
+43
lib/html5_checker/content_model/element_registry.ml
··· 1 + type t = (string, Element_spec.t) Hashtbl.t 2 + 3 + let create () = Hashtbl.create 128 4 + 5 + let register registry spec = 6 + let name = String.lowercase_ascii spec.Element_spec.name in 7 + Hashtbl.replace registry name spec 8 + 9 + let get registry name = 10 + let name = String.lowercase_ascii name in 11 + Hashtbl.find_opt registry name 12 + 13 + let list_names registry = 14 + Hashtbl.to_seq_keys registry 15 + |> List.of_seq 16 + |> List.sort String.compare 17 + 18 + let all registry = 19 + Hashtbl.to_seq_values registry 20 + |> List.of_seq 21 + 22 + let default () = 23 + let registry = create () in 24 + 25 + (* Register document structure elements *) 26 + List.iter (register registry) Elements_document.all; 27 + 28 + (* Register text-level elements *) 29 + List.iter (register registry) Elements_text.all; 30 + 31 + (* Register form elements *) 32 + List.iter (register registry) Elements_form.all; 33 + 34 + (* Register embedded content elements *) 35 + List.iter (register registry) Elements_embedded.all; 36 + 37 + (* Register table elements *) 38 + List.iter (register registry) Elements_table.all; 39 + 40 + (* Register interactive elements *) 41 + List.iter (register registry) Elements_interactive.all; 42 + 43 + registry
+43
lib/html5_checker/content_model/element_registry.mli
··· 1 + (** Registry for HTML5 element specifications. 2 + 3 + Provides fast lookup of element specs by name. *) 4 + 5 + (** The type of an element registry. *) 6 + type t 7 + 8 + (** {1 Creation and Modification} *) 9 + 10 + val create : unit -> t 11 + (** [create ()] creates a new empty element registry. *) 12 + 13 + val register : t -> Element_spec.t -> unit 14 + (** [register registry spec] adds an element specification to the registry. 15 + 16 + If an element with the same name already exists, it is replaced. *) 17 + 18 + (** {1 Lookup} *) 19 + 20 + val get : t -> string -> Element_spec.t option 21 + (** [get registry name] looks up an element specification by tag name. 22 + 23 + Returns [None] if the element is not registered. Tag names are 24 + case-insensitive. *) 25 + 26 + val list_names : t -> string list 27 + (** [list_names registry] returns a sorted list of all registered element names. *) 28 + 29 + val all : t -> Element_spec.t list 30 + (** [all registry] returns all registered element specifications. *) 31 + 32 + (** {1 Default Registry} *) 33 + 34 + val default : unit -> t 35 + (** [default ()] creates a registry pre-populated with all standard HTML5 elements. 36 + 37 + The registry includes elements from: 38 + - {!Elements_document} - Document structure and sectioning 39 + - {!Elements_text} - Text-level semantics 40 + - {!Elements_form} - Forms and input controls 41 + - {!Elements_embedded} - Embedded content 42 + - {!Elements_table} - Tables 43 + - {!Elements_interactive} - Interactive elements *)
+62
lib/html5_checker/content_model/element_spec.ml
··· 1 + type t = { 2 + name : string; 3 + void : bool; 4 + categories : Content_category.t list; 5 + content_model : Content_model.t; 6 + permitted_parents : string list option; 7 + prohibited_ancestors : string list; 8 + tag_omission : bool; 9 + attrs : Attr_spec.t list; 10 + implicit_aria_role : string option; 11 + } 12 + 13 + let make ~name ?(void = false) ?(categories = []) 14 + ?(content_model = Content_model.Nothing) ?permitted_parents 15 + ?(prohibited_ancestors = []) ?(tag_omission = false) ?(attrs = []) 16 + ?implicit_aria_role () = 17 + { 18 + name; 19 + void; 20 + categories; 21 + content_model; 22 + permitted_parents; 23 + prohibited_ancestors; 24 + tag_omission; 25 + attrs; 26 + implicit_aria_role; 27 + } 28 + 29 + let is_void t = t.void 30 + 31 + let has_category t category = List.mem category t.categories 32 + 33 + let pp fmt t = 34 + Format.fprintf fmt "@[<v 2>{ name = %S;@ void = %b;@ " t.name t.void; 35 + Format.fprintf fmt "categories = [%a];@ " 36 + (Format.pp_print_list 37 + ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") 38 + Content_category.pp) 39 + t.categories; 40 + Format.fprintf fmt "content_model = %a;@ " Content_model.pp t.content_model; 41 + (match t.permitted_parents with 42 + | None -> Format.fprintf fmt "permitted_parents = None;@ " 43 + | Some parents -> 44 + Format.fprintf fmt "permitted_parents = Some [%a];@ " 45 + (Format.pp_print_list 46 + ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") 47 + (fun fmt s -> Format.fprintf fmt "%S" s)) 48 + parents); 49 + Format.fprintf fmt "prohibited_ancestors = [%a];@ " 50 + (Format.pp_print_list 51 + ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") 52 + (fun fmt s -> Format.fprintf fmt "%S" s)) 53 + t.prohibited_ancestors; 54 + Format.fprintf fmt "tag_omission = %b;@ " t.tag_omission; 55 + Format.fprintf fmt "attrs = [%a];@ " 56 + (Format.pp_print_list 57 + ~pp_sep:(fun fmt () -> Format.fprintf fmt ";@ ") 58 + Attr_spec.pp) 59 + t.attrs; 60 + (match t.implicit_aria_role with 61 + | None -> Format.fprintf fmt "implicit_aria_role = None }@]" 62 + | Some role -> Format.fprintf fmt "implicit_aria_role = Some %S }@]" role)
+35
lib/html5_checker/content_model/element_spec.mli
··· 1 + (** HTML5 element specifications. 2 + 3 + Defines the complete specification for an HTML5 element including content 4 + model, categories, and attributes. *) 5 + 6 + type t = { 7 + name : string; 8 + void : bool; (** Is void element? *) 9 + categories : Content_category.t list; (** What categories it belongs to *) 10 + content_model : Content_model.t; (** What children are allowed *) 11 + permitted_parents : string list option; (** Explicit parent restrictions *) 12 + prohibited_ancestors : string list; (** Cannot appear inside these *) 13 + tag_omission : bool; (** Can end tag be omitted? *) 14 + attrs : Attr_spec.t list; (** Element-specific attributes *) 15 + implicit_aria_role : string option; (** Default ARIA role *) 16 + } 17 + 18 + val make : 19 + name:string -> 20 + ?void:bool -> 21 + ?categories:Content_category.t list -> 22 + ?content_model:Content_model.t -> 23 + ?permitted_parents:string list -> 24 + ?prohibited_ancestors:string list -> 25 + ?tag_omission:bool -> 26 + ?attrs:Attr_spec.t list -> 27 + ?implicit_aria_role:string -> 28 + unit -> 29 + t 30 + 31 + val is_void : t -> bool 32 + 33 + val has_category : t -> Content_category.t -> bool 34 + 35 + val pp : Format.formatter -> t -> unit
+289
lib/html5_checker/content_model/elements_document.ml
··· 1 + (** Document structure elements *) 2 + 3 + let html = 4 + Element_spec.make ~name:"html" 5 + ~content_model: 6 + (Content_model.Sequence 7 + [ 8 + Content_model.Optional (Content_model.Elements [ "head" ]); 9 + Content_model.Optional (Content_model.Elements [ "body" ]); 10 + ]) 11 + ~implicit_aria_role:"document" () 12 + 13 + let head = 14 + Element_spec.make ~name:"head" ~categories:[] 15 + ~content_model: 16 + (Content_model.Zero_or_more (Content_model.Categories [ Metadata ])) 17 + () 18 + 19 + let body = 20 + Element_spec.make ~name:"body" ~categories:[ Sectioning ] 21 + ~content_model:(Content_model.Categories [ Flow ]) () 22 + 23 + let title = 24 + Element_spec.make ~name:"title" ~categories:[ Metadata ] 25 + ~content_model:Content_model.Text () 26 + 27 + let base = 28 + Element_spec.make ~name:"base" ~void:true ~categories:[ Metadata ] 29 + ~content_model:Content_model.Nothing () 30 + 31 + let link = 32 + Element_spec.make ~name:"link" ~void:true ~categories:[ Metadata ] 33 + ~content_model:Content_model.Nothing () 34 + 35 + let meta = 36 + Element_spec.make ~name:"meta" ~void:true ~categories:[ Metadata ] 37 + ~content_model:Content_model.Nothing () 38 + 39 + let style = 40 + Element_spec.make ~name:"style" ~categories:[ Metadata ] 41 + ~content_model:Content_model.Text () 42 + 43 + (** Sectioning elements *) 44 + 45 + let article = 46 + Element_spec.make ~name:"article" ~categories:[ Flow; Sectioning; Palpable ] 47 + ~content_model:(Content_model.Categories [ Flow ]) () 48 + 49 + let section = 50 + Element_spec.make ~name:"section" ~categories:[ Flow; Sectioning; Palpable ] 51 + ~content_model:(Content_model.Categories [ Flow ]) () 52 + 53 + let nav = 54 + Element_spec.make ~name:"nav" ~categories:[ Flow; Sectioning; Palpable ] 55 + ~content_model:(Content_model.Categories [ Flow ]) 56 + ~implicit_aria_role:"navigation" () 57 + 58 + let aside = 59 + Element_spec.make ~name:"aside" ~categories:[ Flow; Sectioning; Palpable ] 60 + ~content_model:(Content_model.Categories [ Flow ]) 61 + ~implicit_aria_role:"complementary" () 62 + 63 + let h1 = 64 + Element_spec.make ~name:"h1" ~categories:[ Flow; Heading; Palpable ] 65 + ~content_model:(Content_model.Categories [ Phrasing ]) 66 + ~implicit_aria_role:"heading" () 67 + 68 + let h2 = 69 + Element_spec.make ~name:"h2" ~categories:[ Flow; Heading; Palpable ] 70 + ~content_model:(Content_model.Categories [ Phrasing ]) 71 + ~implicit_aria_role:"heading" () 72 + 73 + let h3 = 74 + Element_spec.make ~name:"h3" ~categories:[ Flow; Heading; Palpable ] 75 + ~content_model:(Content_model.Categories [ Phrasing ]) 76 + ~implicit_aria_role:"heading" () 77 + 78 + let h4 = 79 + Element_spec.make ~name:"h4" ~categories:[ Flow; Heading; Palpable ] 80 + ~content_model:(Content_model.Categories [ Phrasing ]) 81 + ~implicit_aria_role:"heading" () 82 + 83 + let h5 = 84 + Element_spec.make ~name:"h5" ~categories:[ Flow; Heading; Palpable ] 85 + ~content_model:(Content_model.Categories [ Phrasing ]) 86 + ~implicit_aria_role:"heading" () 87 + 88 + let h6 = 89 + Element_spec.make ~name:"h6" ~categories:[ Flow; Heading; Palpable ] 90 + ~content_model:(Content_model.Categories [ Phrasing ]) 91 + ~implicit_aria_role:"heading" () 92 + 93 + let hgroup = 94 + Element_spec.make ~name:"hgroup" ~categories:[ Flow; Heading; Palpable ] 95 + ~content_model: 96 + (Content_model.Sequence 97 + [ 98 + Content_model.Zero_or_more 99 + (Content_model.Choice 100 + [ 101 + Content_model.Elements [ "p" ]; 102 + Content_model.Categories [ Heading ]; 103 + ]); 104 + ]) 105 + ~implicit_aria_role:"group" () 106 + 107 + let header = 108 + Element_spec.make ~name:"header" ~categories:[ Flow; Palpable ] 109 + ~content_model: 110 + (Content_model.Except 111 + ( Content_model.Categories [ Flow ], 112 + [] (* no header/footer descendants *) )) 113 + ~prohibited_ancestors:[ "header"; "footer" ] ~implicit_aria_role:"banner" () 114 + 115 + let footer = 116 + Element_spec.make ~name:"footer" ~categories:[ Flow; Palpable ] 117 + ~content_model: 118 + (Content_model.Except 119 + ( Content_model.Categories [ Flow ], 120 + [] (* no header/footer descendants *) )) 121 + ~prohibited_ancestors:[ "header"; "footer" ] 122 + ~implicit_aria_role:"contentinfo" () 123 + 124 + let address = 125 + Element_spec.make ~name:"address" ~categories:[ Flow; Palpable ] 126 + ~content_model: 127 + (Content_model.Except 128 + ( Content_model.Categories [ Flow ], 129 + [ Heading; Sectioning ] (* no heading/sectioning content *) )) 130 + ~prohibited_ancestors:[ "address" ] () 131 + 132 + let main = 133 + Element_spec.make ~name:"main" ~categories:[ Flow; Palpable ] 134 + ~content_model:(Content_model.Categories [ Flow ]) 135 + ~implicit_aria_role:"main" () 136 + 137 + (** Grouping elements *) 138 + 139 + let p = 140 + Element_spec.make ~name:"p" ~categories:[ Flow; Palpable ] 141 + ~content_model:(Content_model.Categories [ Phrasing ]) () 142 + 143 + let hr = 144 + Element_spec.make ~name:"hr" ~void:true ~categories:[ Flow ] 145 + ~content_model:Content_model.Nothing ~implicit_aria_role:"separator" () 146 + 147 + let pre = 148 + Element_spec.make ~name:"pre" ~categories:[ Flow; Palpable ] 149 + ~content_model:(Content_model.Categories [ Phrasing ]) () 150 + 151 + let blockquote = 152 + Element_spec.make ~name:"blockquote" ~categories:[ Flow; Palpable ] 153 + ~content_model:(Content_model.Categories [ Flow ]) () 154 + 155 + let ol = 156 + Element_spec.make ~name:"ol" ~categories:[ Flow; Palpable ] 157 + ~content_model:(Content_model.Zero_or_more (Content_model.Elements [ "li" ])) 158 + ~implicit_aria_role:"list" () 159 + 160 + let ul = 161 + Element_spec.make ~name:"ul" ~categories:[ Flow; Palpable ] 162 + ~content_model:(Content_model.Zero_or_more (Content_model.Elements [ "li" ])) 163 + ~implicit_aria_role:"list" () 164 + 165 + let menu = 166 + Element_spec.make ~name:"menu" ~categories:[ Flow; Palpable ] 167 + ~content_model: 168 + (Content_model.Zero_or_more 169 + (Content_model.Choice 170 + [ 171 + Content_model.Elements [ "li" ]; 172 + Content_model.Elements [ "script" ]; 173 + Content_model.Elements [ "template" ]; 174 + ])) 175 + ~implicit_aria_role:"list" () 176 + 177 + let li = 178 + Element_spec.make ~name:"li" ~categories:[] 179 + ~content_model:(Content_model.Categories [ Flow ]) 180 + ~permitted_parents:[ "ol"; "ul"; "menu" ] ~implicit_aria_role:"listitem" () 181 + 182 + let dl = 183 + Element_spec.make ~name:"dl" ~categories:[ Flow; Palpable ] 184 + ~content_model: 185 + (Content_model.Choice 186 + [ 187 + Content_model.Zero_or_more 188 + (Content_model.Sequence 189 + [ 190 + Content_model.One_or_more (Content_model.Elements [ "dt" ]); 191 + Content_model.One_or_more (Content_model.Elements [ "dd" ]); 192 + ]); 193 + Content_model.Zero_or_more 194 + (Content_model.Choice 195 + [ 196 + Content_model.Elements [ "div" ]; 197 + Content_model.Elements [ "script" ]; 198 + Content_model.Elements [ "template" ]; 199 + ]); 200 + ]) 201 + () 202 + 203 + let dt = 204 + Element_spec.make ~name:"dt" ~categories:[] 205 + ~content_model: 206 + (Content_model.Except 207 + ( Content_model.Categories [ Flow ], 208 + [ Heading; Sectioning ] (* no heading/sectioning *) )) 209 + ~permitted_parents:[ "dl"; "div" ] () 210 + 211 + let dd = 212 + Element_spec.make ~name:"dd" ~categories:[] 213 + ~content_model:(Content_model.Categories [ Flow ]) 214 + ~permitted_parents:[ "dl"; "div" ] () 215 + 216 + let figure = 217 + Element_spec.make ~name:"figure" ~categories:[ Flow; Palpable ] 218 + ~content_model: 219 + (Content_model.Choice 220 + [ 221 + Content_model.Sequence 222 + [ 223 + Content_model.Optional 224 + (Content_model.Elements [ "figcaption" ]); 225 + Content_model.Categories [ Flow ]; 226 + ]; 227 + Content_model.Sequence 228 + [ 229 + Content_model.Categories [ Flow ]; 230 + Content_model.Optional 231 + (Content_model.Elements [ "figcaption" ]); 232 + ]; 233 + ]) 234 + ~implicit_aria_role:"figure" () 235 + 236 + let figcaption = 237 + Element_spec.make ~name:"figcaption" ~categories:[] 238 + ~content_model:(Content_model.Categories [ Flow ]) 239 + ~permitted_parents:[ "figure" ] () 240 + 241 + let div = 242 + Element_spec.make ~name:"div" ~categories:[ Flow; Palpable ] 243 + ~content_model:(Content_model.Categories [ Flow ]) () 244 + 245 + (** Element registry *) 246 + 247 + let all = 248 + [ 249 + (* Document structure *) 250 + html; 251 + head; 252 + body; 253 + title; 254 + base; 255 + link; 256 + meta; 257 + style; 258 + (* Sectioning *) 259 + article; 260 + section; 261 + nav; 262 + aside; 263 + h1; 264 + h2; 265 + h3; 266 + h4; 267 + h5; 268 + h6; 269 + hgroup; 270 + header; 271 + footer; 272 + address; 273 + main; 274 + (* Grouping *) 275 + p; 276 + hr; 277 + pre; 278 + blockquote; 279 + ol; 280 + ul; 281 + menu; 282 + li; 283 + dl; 284 + dt; 285 + dd; 286 + figure; 287 + figcaption; 288 + div; 289 + ]
+147
lib/html5_checker/content_model/elements_document.mli
··· 1 + (** HTML5 structural and document element specifications. 2 + 3 + This module defines element specifications for HTML5 document structure, 4 + sectioning, and grouping elements according to the WHATWG HTML specification. 5 + 6 + @see <https://html.spec.whatwg.org/multipage/> WHATWG HTML Specification *) 7 + 8 + (** {1 Document structure elements} *) 9 + 10 + val html : Element_spec.t 11 + (** The [html] element represents the root of an HTML document. *) 12 + 13 + val head : Element_spec.t 14 + (** The [head] element represents a collection of metadata for the document. *) 15 + 16 + val body : Element_spec.t 17 + (** The [body] element represents the contents of the document. *) 18 + 19 + val title : Element_spec.t 20 + (** The [title] element represents the document's title or name. *) 21 + 22 + val base : Element_spec.t 23 + (** The [base] element specifies the document base URL and/or default browsing 24 + context for navigation. *) 25 + 26 + val link : Element_spec.t 27 + (** The [link] element specifies relationships between the current document and 28 + external resources. *) 29 + 30 + val meta : Element_spec.t 31 + (** The [meta] element represents various kinds of metadata that cannot be 32 + expressed using other metadata elements. *) 33 + 34 + val style : Element_spec.t 35 + (** The [style] element allows authors to embed CSS style sheets in their documents. *) 36 + 37 + (** {1 Sectioning elements} *) 38 + 39 + val article : Element_spec.t 40 + (** The [article] element represents a complete, or self-contained, composition 41 + in a document, page, application, or site. *) 42 + 43 + val section : Element_spec.t 44 + (** The [section] element represents a generic section of a document or application. *) 45 + 46 + val nav : Element_spec.t 47 + (** The [nav] element represents a section of a page that links to other pages 48 + or to parts within the page. *) 49 + 50 + val aside : Element_spec.t 51 + (** The [aside] element represents a section of a page that consists of content 52 + that is tangentially related to the content around it. *) 53 + 54 + val h1 : Element_spec.t 55 + (** The [h1] element represents a heading at level 1. *) 56 + 57 + val h2 : Element_spec.t 58 + (** The [h2] element represents a heading at level 2. *) 59 + 60 + val h3 : Element_spec.t 61 + (** The [h3] element represents a heading at level 3. *) 62 + 63 + val h4 : Element_spec.t 64 + (** The [h4] element represents a heading at level 4. *) 65 + 66 + val h5 : Element_spec.t 67 + (** The [h5] element represents a heading at level 5. *) 68 + 69 + val h6 : Element_spec.t 70 + (** The [h6] element represents a heading at level 6. *) 71 + 72 + val hgroup : Element_spec.t 73 + (** The [hgroup] element represents a heading and related content, such as 74 + subheadings, an alternative title, or a tagline. *) 75 + 76 + val header : Element_spec.t 77 + (** The [header] element represents introductory content for its nearest ancestor 78 + sectioning content or sectioning root element. *) 79 + 80 + val footer : Element_spec.t 81 + (** The [footer] element represents a footer for its nearest ancestor sectioning 82 + content or sectioning root element. *) 83 + 84 + val address : Element_spec.t 85 + (** The [address] element represents contact information for its nearest [article] 86 + or [body] element ancestor. *) 87 + 88 + val main : Element_spec.t 89 + (** The [main] element represents the dominant contents of the document. *) 90 + 91 + (** {1 Grouping elements} *) 92 + 93 + val p : Element_spec.t 94 + (** The [p] element represents a paragraph. *) 95 + 96 + val hr : Element_spec.t 97 + (** The [hr] element represents a thematic break between paragraph-level elements. *) 98 + 99 + val pre : Element_spec.t 100 + (** The [pre] element represents a block of preformatted text. *) 101 + 102 + val blockquote : Element_spec.t 103 + (** The [blockquote] element represents a section that is quoted from another source. *) 104 + 105 + val ol : Element_spec.t 106 + (** The [ol] element represents a list of items, where the items have been 107 + intentionally ordered. *) 108 + 109 + val ul : Element_spec.t 110 + (** The [ul] element represents a list of items, where the order of the items 111 + is not important. *) 112 + 113 + val menu : Element_spec.t 114 + (** The [menu] element represents a toolbar consisting of its contents, in the 115 + form of an unordered list of items. *) 116 + 117 + val li : Element_spec.t 118 + (** The [li] element represents a list item. *) 119 + 120 + val dl : Element_spec.t 121 + (** The [dl] element represents an association list consisting of zero or more 122 + name-value groups (a description list). *) 123 + 124 + val dt : Element_spec.t 125 + (** The [dt] element represents the term, or name, part of a term-description 126 + group in a description list. *) 127 + 128 + val dd : Element_spec.t 129 + (** The [dd] element represents the description, definition, or value, part of 130 + a term-description group in a description list. *) 131 + 132 + val figure : Element_spec.t 133 + (** The [figure] element represents some flow content, optionally with a caption, 134 + that is self-contained and is typically referenced as a single unit from 135 + the main flow of the document. *) 136 + 137 + val figcaption : Element_spec.t 138 + (** The [figcaption] element represents a caption or legend for the rest of the 139 + contents of the parent [figure] element. *) 140 + 141 + val div : Element_spec.t 142 + (** The [div] element has no special meaning at all. It represents its children. *) 143 + 144 + (** {1 Element registry} *) 145 + 146 + val all : Element_spec.t list 147 + (** [all] contains all element specifications defined in this module. *)
+220
lib/html5_checker/content_model/elements_embedded.ml
··· 1 + open Content_category 2 + open Content_model 3 + open Attr_spec 4 + 5 + let picture = 6 + Element_spec.make ~name:"picture" 7 + ~categories:[ Flow; Phrasing; Embedded; Palpable ] 8 + ~content_model: 9 + (Sequence 10 + [ 11 + Zero_or_more (Elements [ "source" ]); 12 + Elements [ "img" ]; 13 + Zero_or_more (Elements [ "script"; "template" ]); 14 + ]) 15 + ~attrs:[] () 16 + 17 + let source = 18 + Element_spec.make ~name:"source" ~void:true 19 + ~categories:[] 20 + ~content_model:Nothing 21 + ~attrs: 22 + [ 23 + make "type" ~datatype:"mime" (); 24 + make "media" ~datatype:"media_query" (); 25 + make "src" ~datatype:"url" (); 26 + make "srcset" ~datatype:"srcset" (); 27 + make "sizes" (); 28 + make "width" ~datatype:"integer" (); 29 + make "height" ~datatype:"integer" (); 30 + ] 31 + () 32 + 33 + let img = 34 + Element_spec.make ~name:"img" ~void:true 35 + ~categories:[ Flow; Phrasing; Embedded; Palpable; Interactive ] 36 + ~content_model:Nothing 37 + ~attrs: 38 + [ 39 + make "src" ~required:true ~datatype:"url" (); 40 + make "alt" (); 41 + make "srcset" ~datatype:"srcset" (); 42 + make "sizes" (); 43 + make "crossorigin" ~datatype:"crossorigin" (); 44 + make "usemap" ~datatype:"hash" (); 45 + make "ismap" ~datatype:"boolean" (); 46 + make "width" ~datatype:"integer" (); 47 + make "height" ~datatype:"integer" (); 48 + make "referrerpolicy" ~datatype:"referrer" (); 49 + make "decoding" ~datatype:"decoding" (); 50 + make "loading" ~datatype:"loading" (); 51 + make "fetchpriority" ~datatype:"fetchpriority" (); 52 + ] 53 + ~implicit_aria_role:"img" () 54 + 55 + let iframe = 56 + Element_spec.make ~name:"iframe" 57 + ~categories:[ Flow; Phrasing; Embedded; Interactive; Palpable ] 58 + ~content_model:Nothing 59 + ~attrs: 60 + [ 61 + make "src" ~datatype:"url" (); 62 + make "srcdoc" (); 63 + make "name" ~datatype:"target" (); 64 + make "sandbox" ~datatype:"sandbox" (); 65 + make "allow" (); 66 + make "allowfullscreen" ~datatype:"boolean" (); 67 + make "width" ~datatype:"integer" (); 68 + make "height" ~datatype:"integer" (); 69 + make "referrerpolicy" ~datatype:"referrer" (); 70 + make "loading" ~datatype:"loading" (); 71 + ] 72 + () 73 + 74 + let embed = 75 + Element_spec.make ~name:"embed" ~void:true 76 + ~categories:[ Flow; Phrasing; Embedded; Interactive; Palpable ] 77 + ~content_model:Nothing 78 + ~attrs: 79 + [ 80 + make "src" ~required:true ~datatype:"url" (); 81 + make "type" ~datatype:"mime" (); 82 + make "width" ~datatype:"integer" (); 83 + make "height" ~datatype:"integer" (); 84 + ] 85 + () 86 + 87 + let object_ = 88 + Element_spec.make ~name:"object" 89 + ~categories:[ Flow; Phrasing; Embedded; Palpable; Interactive ] 90 + ~content_model:(Mixed [ Flow ]) 91 + ~attrs: 92 + [ 93 + make "data" ~datatype:"url" (); 94 + make "type" ~datatype:"mime" (); 95 + make "name" ~datatype:"target" (); 96 + make "form" ~datatype:"id" (); 97 + make "width" ~datatype:"integer" (); 98 + make "height" ~datatype:"integer" (); 99 + ] 100 + () 101 + 102 + let param = 103 + Element_spec.make ~name:"param" ~void:true 104 + ~categories:[] 105 + ~content_model:Nothing 106 + ~attrs:[ make "name" ~required:true (); make "value" ~required:true () ] 107 + () 108 + 109 + let video = 110 + Element_spec.make ~name:"video" 111 + ~categories:[ Flow; Phrasing; Embedded; Palpable; Interactive ] 112 + ~content_model: 113 + (Choice 114 + [ 115 + Sequence 116 + [ 117 + Zero_or_more (Elements [ "source" ]); 118 + Zero_or_more (Elements [ "track" ]); 119 + Zero_or_more Transparent; 120 + ]; 121 + Sequence [ Zero_or_more Transparent; Zero_or_more (Elements [ "track" ]) ]; 122 + ]) 123 + ~prohibited_ancestors:[ "audio"; "video" ] 124 + ~attrs: 125 + [ 126 + make "src" ~datatype:"url" (); 127 + make "crossorigin" ~datatype:"crossorigin" (); 128 + make "poster" ~datatype:"url" (); 129 + make "preload" ~datatype:"preload" (); 130 + make "autoplay" ~datatype:"boolean" (); 131 + make "playsinline" ~datatype:"boolean" (); 132 + make "loop" ~datatype:"boolean" (); 133 + make "muted" ~datatype:"boolean" (); 134 + make "controls" ~datatype:"boolean" (); 135 + make "width" ~datatype:"integer" (); 136 + make "height" ~datatype:"integer" (); 137 + ] 138 + () 139 + 140 + let audio = 141 + Element_spec.make ~name:"audio" 142 + ~categories:[ Flow; Phrasing; Embedded; Palpable; Interactive ] 143 + ~content_model: 144 + (Choice 145 + [ 146 + Sequence 147 + [ 148 + Zero_or_more (Elements [ "source" ]); 149 + Zero_or_more (Elements [ "track" ]); 150 + Zero_or_more Transparent; 151 + ]; 152 + Sequence [ Zero_or_more Transparent; Zero_or_more (Elements [ "track" ]) ]; 153 + ]) 154 + ~prohibited_ancestors:[ "audio"; "video" ] 155 + ~attrs: 156 + [ 157 + make "src" ~datatype:"url" (); 158 + make "crossorigin" ~datatype:"crossorigin" (); 159 + make "preload" ~datatype:"preload" (); 160 + make "autoplay" ~datatype:"boolean" (); 161 + make "loop" ~datatype:"boolean" (); 162 + make "muted" ~datatype:"boolean" (); 163 + make "controls" ~datatype:"boolean" (); 164 + ] 165 + () 166 + 167 + let track = 168 + Element_spec.make ~name:"track" ~void:true 169 + ~categories:[] 170 + ~content_model:Nothing 171 + ~permitted_parents:[ "audio"; "video" ] 172 + ~attrs: 173 + [ 174 + make "kind" ~datatype:"kind" (); 175 + make "src" ~required:true ~datatype:"url" (); 176 + make "srclang" ~datatype:"language" (); 177 + make "label" (); 178 + make "default" ~datatype:"boolean" (); 179 + ] 180 + () 181 + 182 + let map = 183 + Element_spec.make ~name:"map" 184 + ~categories:[ Flow; Phrasing; Palpable ] 185 + ~content_model:Transparent 186 + ~attrs:[ make "name" ~required:true () ] () 187 + 188 + let area = 189 + Element_spec.make ~name:"area" ~void:true 190 + ~categories:[ Flow; Phrasing ] 191 + ~content_model:Nothing 192 + ~attrs: 193 + [ 194 + make "alt" (); 195 + make "coords" ~datatype:"coords" (); 196 + make "shape" ~datatype:"shape" (); 197 + make "href" ~datatype:"url" (); 198 + make "target" ~datatype:"target" (); 199 + make "download" (); 200 + make "ping" (); 201 + make "rel" ~datatype:"link_type" (); 202 + make "referrerpolicy" ~datatype:"referrer" (); 203 + ] 204 + ~implicit_aria_role:"link" () 205 + 206 + let all = 207 + [ 208 + picture; 209 + source; 210 + img; 211 + iframe; 212 + embed; 213 + object_; 214 + param; 215 + video; 216 + audio; 217 + track; 218 + map; 219 + area; 220 + ]
+54
lib/html5_checker/content_model/elements_embedded.mli
··· 1 + (** HTML5 embedded content element specifications. 2 + 3 + Embedded content elements import resources into the document. 4 + See https://html.spec.whatwg.org/multipage/embedded-content.html *) 5 + 6 + val picture : Element_spec.t 7 + (** The picture element contains zero or more source elements followed by 8 + one img element to offer alternative versions of an image for different 9 + display scenarios. *) 10 + 11 + val source : Element_spec.t 12 + (** The source element specifies multiple media resources for picture, audio, 13 + and video elements. It is a void element. *) 14 + 15 + val img : Element_spec.t 16 + (** The img element represents an image. It is a void element. *) 17 + 18 + val iframe : Element_spec.t 19 + (** The iframe element represents a nested browsing context. *) 20 + 21 + val embed : Element_spec.t 22 + (** The embed element provides an integration point for an external application 23 + or interactive content. It is a void element. *) 24 + 25 + val object_ : Element_spec.t 26 + (** The object element can represent an external resource, which is treated as 27 + an image, a nested browsing context, or a plugin. *) 28 + 29 + val param : Element_spec.t 30 + (** The param element defines parameters for plugins invoked by object elements. 31 + Deprecated in favor of using data attributes. It is a void element. *) 32 + 33 + val video : Element_spec.t 34 + (** The video element is used for playing videos or movies, and audio files 35 + with captions. *) 36 + 37 + val audio : Element_spec.t 38 + (** The audio element represents a sound or audio stream. *) 39 + 40 + val track : Element_spec.t 41 + (** The track element allows authors to specify explicit external timed text 42 + tracks for media elements. It is a void element. *) 43 + 44 + val map : Element_spec.t 45 + (** The map element, in conjunction with img and area elements, defines an 46 + image map. *) 47 + 48 + val area : Element_spec.t 49 + (** The area element represents either a hyperlink with some text and a 50 + corresponding area on an image map, or a dead area on an image map. 51 + It is a void element. *) 52 + 53 + val all : Element_spec.t list 54 + (** List of all embedded content element specifications. *)
+237
lib/html5_checker/content_model/elements_form.ml
··· 1 + open Content_category 2 + open Content_model 3 + 4 + let form = 5 + Element_spec.make ~name:"form" 6 + ~categories:[Flow; Palpable] 7 + ~content_model:(Except (Categories [Flow], [])) 8 + ~attrs:[ 9 + Attr_spec.make "accept-charset" ~datatype:"string" (); 10 + Attr_spec.make "action" ~datatype:"url" (); 11 + Attr_spec.make "autocomplete" ~datatype:"autocomplete" (); 12 + Attr_spec.make "enctype" ~datatype:"encoding" (); 13 + Attr_spec.make "method" ~datatype:"method" (); 14 + Attr_spec.make "name" ~datatype:"string" (); 15 + Attr_spec.make "novalidate" ~datatype:"boolean" (); 16 + Attr_spec.make "target" ~datatype:"target" (); 17 + Attr_spec.make "rel" ~datatype:"relationship" (); 18 + ] 19 + ~prohibited_ancestors:["form"] 20 + () 21 + 22 + let label = 23 + Element_spec.make ~name:"label" 24 + ~categories:[Flow; Phrasing; Interactive; Palpable] 25 + ~content_model:(Categories [Phrasing]) 26 + ~attrs:[ 27 + Attr_spec.make "for" ~datatype:"idref" (); 28 + ] 29 + () 30 + 31 + let input = 32 + Element_spec.make ~name:"input" 33 + ~void:true 34 + ~categories:[Flow; Phrasing; Interactive; Palpable] 35 + ~content_model:Nothing 36 + ~attrs:[ 37 + Attr_spec.make "accept" ~datatype:"string" (); 38 + Attr_spec.make "alt" ~datatype:"string" (); 39 + Attr_spec.make "autocomplete" ~datatype:"autocomplete" (); 40 + Attr_spec.make "checked" ~datatype:"boolean" (); 41 + Attr_spec.make "dirname" ~datatype:"directionality" (); 42 + Attr_spec.make "disabled" ~datatype:"boolean" (); 43 + Attr_spec.make "form" ~datatype:"idref" (); 44 + Attr_spec.make "formaction" ~datatype:"url" (); 45 + Attr_spec.make "formenctype" ~datatype:"encoding" (); 46 + Attr_spec.make "formmethod" ~datatype:"method" (); 47 + Attr_spec.make "formnovalidate" ~datatype:"boolean" (); 48 + Attr_spec.make "formtarget" ~datatype:"target" (); 49 + Attr_spec.make "height" ~datatype:"integer" (); 50 + Attr_spec.make "list" ~datatype:"idref" (); 51 + Attr_spec.make "max" ~datatype:"string" (); 52 + Attr_spec.make "maxlength" ~datatype:"integer" (); 53 + Attr_spec.make "min" ~datatype:"string" (); 54 + Attr_spec.make "minlength" ~datatype:"integer" (); 55 + Attr_spec.make "multiple" ~datatype:"boolean" (); 56 + Attr_spec.make "name" ~datatype:"string" (); 57 + Attr_spec.make "pattern" ~datatype:"string" (); 58 + Attr_spec.make "placeholder" ~datatype:"string" (); 59 + Attr_spec.make "popovertarget" ~datatype:"string" (); 60 + Attr_spec.make "popovertargetaction" ~datatype:"popovertargetaction" (); 61 + Attr_spec.make "readonly" ~datatype:"boolean" (); 62 + Attr_spec.make "required" ~datatype:"boolean" (); 63 + Attr_spec.make "size" ~datatype:"integer" (); 64 + Attr_spec.make "src" ~datatype:"url" (); 65 + Attr_spec.make "step" ~datatype:"string" (); 66 + Attr_spec.make "type" ~enum_values:[ 67 + "hidden"; "text"; "search"; "tel"; "url"; "email"; "password"; 68 + "date"; "month"; "week"; "time"; "datetime-local"; "number"; 69 + "range"; "color"; "checkbox"; "radio"; "file"; "submit"; "image"; 70 + "reset"; "button" 71 + ] (); 72 + Attr_spec.make "value" ~datatype:"string" (); 73 + Attr_spec.make "width" ~datatype:"integer" (); 74 + ] 75 + () 76 + 77 + let button = 78 + Element_spec.make ~name:"button" 79 + ~categories:[Flow; Phrasing; Interactive; Palpable] 80 + ~content_model:(Except (Categories [Phrasing], [Interactive])) 81 + ~attrs:[ 82 + Attr_spec.make "disabled" ~datatype:"boolean" (); 83 + Attr_spec.make "form" ~datatype:"idref" (); 84 + Attr_spec.make "formaction" ~datatype:"url" (); 85 + Attr_spec.make "formenctype" ~datatype:"encoding" (); 86 + Attr_spec.make "formmethod" ~datatype:"method" (); 87 + Attr_spec.make "formnovalidate" ~datatype:"boolean" (); 88 + Attr_spec.make "formtarget" ~datatype:"target" (); 89 + Attr_spec.make "name" ~datatype:"string" (); 90 + Attr_spec.make "popovertarget" ~datatype:"string" (); 91 + Attr_spec.make "popovertargetaction" ~datatype:"popovertargetaction" (); 92 + Attr_spec.make "type" ~enum_values:["submit"; "reset"; "button"] (); 93 + Attr_spec.make "value" ~datatype:"string" (); 94 + ] 95 + () 96 + 97 + let select = 98 + Element_spec.make ~name:"select" 99 + ~categories:[Flow; Phrasing; Interactive; Palpable] 100 + ~content_model:(Elements ["option"; "optgroup"; "script"; "template"]) 101 + ~attrs:[ 102 + Attr_spec.make "autocomplete" ~datatype:"autocomplete" (); 103 + Attr_spec.make "disabled" ~datatype:"boolean" (); 104 + Attr_spec.make "form" ~datatype:"idref" (); 105 + Attr_spec.make "multiple" ~datatype:"boolean" (); 106 + Attr_spec.make "name" ~datatype:"string" (); 107 + Attr_spec.make "required" ~datatype:"boolean" (); 108 + Attr_spec.make "size" ~datatype:"integer" (); 109 + ] 110 + ~implicit_aria_role:"combobox" 111 + () 112 + 113 + let datalist = 114 + Element_spec.make ~name:"datalist" 115 + ~categories:[Flow; Phrasing] 116 + ~content_model:(Choice [ 117 + Categories [Phrasing]; 118 + Elements ["option"; "script"; "template"] 119 + ]) 120 + ~implicit_aria_role:"listbox" 121 + () 122 + 123 + let optgroup = 124 + Element_spec.make ~name:"optgroup" 125 + ~categories:[] 126 + ~content_model:(Elements ["option"; "script"; "template"]) 127 + ~attrs:[ 128 + Attr_spec.make "disabled" ~datatype:"boolean" (); 129 + Attr_spec.make "label" ~required:true ~datatype:"string" (); 130 + ] 131 + ~permitted_parents:["select"] 132 + ~implicit_aria_role:"group" 133 + () 134 + 135 + let option = 136 + Element_spec.make ~name:"option" 137 + ~categories:[] 138 + ~content_model:Text 139 + ~attrs:[ 140 + Attr_spec.make "disabled" ~datatype:"boolean" (); 141 + Attr_spec.make "label" ~datatype:"string" (); 142 + Attr_spec.make "selected" ~datatype:"boolean" (); 143 + Attr_spec.make "value" ~datatype:"string" (); 144 + ] 145 + ~permitted_parents:["select"; "datalist"; "optgroup"] 146 + ~implicit_aria_role:"option" 147 + () 148 + 149 + let textarea = 150 + Element_spec.make ~name:"textarea" 151 + ~categories:[Flow; Phrasing; Interactive; Palpable] 152 + ~content_model:Text 153 + ~attrs:[ 154 + Attr_spec.make "autocomplete" ~datatype:"autocomplete" (); 155 + Attr_spec.make "cols" ~datatype:"integer" (); 156 + Attr_spec.make "dirname" ~datatype:"directionality" (); 157 + Attr_spec.make "disabled" ~datatype:"boolean" (); 158 + Attr_spec.make "form" ~datatype:"idref" (); 159 + Attr_spec.make "maxlength" ~datatype:"integer" (); 160 + Attr_spec.make "minlength" ~datatype:"integer" (); 161 + Attr_spec.make "name" ~datatype:"string" (); 162 + Attr_spec.make "placeholder" ~datatype:"string" (); 163 + Attr_spec.make "readonly" ~datatype:"boolean" (); 164 + Attr_spec.make "required" ~datatype:"boolean" (); 165 + Attr_spec.make "rows" ~datatype:"integer" (); 166 + Attr_spec.make "wrap" ~enum_values:["soft"; "hard"] (); 167 + ] 168 + ~implicit_aria_role:"textbox" 169 + () 170 + 171 + let output = 172 + Element_spec.make ~name:"output" 173 + ~categories:[Flow; Phrasing; Palpable] 174 + ~content_model:(Categories [Phrasing]) 175 + ~attrs:[ 176 + Attr_spec.make "for" ~datatype:"idrefs" (); 177 + Attr_spec.make "form" ~datatype:"idref" (); 178 + Attr_spec.make "name" ~datatype:"string" (); 179 + ] 180 + ~implicit_aria_role:"status" 181 + () 182 + 183 + let progress = 184 + Element_spec.make ~name:"progress" 185 + ~categories:[Flow; Phrasing; Palpable] 186 + ~content_model:(Except (Categories [Phrasing], [])) 187 + ~attrs:[ 188 + Attr_spec.make "value" ~datatype:"float" (); 189 + Attr_spec.make "max" ~datatype:"float" (); 190 + ] 191 + ~prohibited_ancestors:["progress"] 192 + () 193 + 194 + let meter = 195 + Element_spec.make ~name:"meter" 196 + ~categories:[Flow; Phrasing; Palpable] 197 + ~content_model:(Except (Categories [Phrasing], [])) 198 + ~attrs:[ 199 + Attr_spec.make "value" ~required:true ~datatype:"float" (); 200 + Attr_spec.make "min" ~datatype:"float" (); 201 + Attr_spec.make "max" ~datatype:"float" (); 202 + Attr_spec.make "low" ~datatype:"float" (); 203 + Attr_spec.make "high" ~datatype:"float" (); 204 + Attr_spec.make "optimum" ~datatype:"float" (); 205 + ] 206 + ~prohibited_ancestors:["meter"] 207 + () 208 + 209 + let fieldset = 210 + Element_spec.make ~name:"fieldset" 211 + ~categories:[Flow; Palpable] 212 + ~content_model:(Sequence [ 213 + Optional (Elements ["legend"]); 214 + Categories [Flow] 215 + ]) 216 + ~attrs:[ 217 + Attr_spec.make "disabled" ~datatype:"boolean" (); 218 + Attr_spec.make "form" ~datatype:"idref" (); 219 + Attr_spec.make "name" ~datatype:"string" (); 220 + ] 221 + ~implicit_aria_role:"group" 222 + () 223 + 224 + let legend = 225 + Element_spec.make ~name:"legend" 226 + ~categories:[] 227 + ~content_model:(Choice [ 228 + Categories [Phrasing]; 229 + Categories [Phrasing; Heading] 230 + ]) 231 + ~permitted_parents:["fieldset"] 232 + () 233 + 234 + let all = [ 235 + form; label; input; button; select; datalist; optgroup; option; 236 + textarea; output; progress; meter; fieldset; legend 237 + ]
+109
lib/html5_checker/content_model/elements_form.mli
··· 1 + (** HTML5 form element specifications. 2 + 3 + Form-associated elements for user input and form submission. 4 + See https://html.spec.whatwg.org/multipage/forms.html *) 5 + 6 + (** {1 Form Container} *) 7 + 8 + val form : Element_spec.t 9 + (** The form element represents a hyperlink that can be manipulated through a 10 + collection of form-associated elements, some of which can represent editable 11 + values that can be submitted to a server for processing. 12 + 13 + Content model: Flow content, but must not contain form element descendants. *) 14 + 15 + (** {1 Form Controls} *) 16 + 17 + val label : Element_spec.t 18 + (** The label element represents a caption in a user interface. The caption can 19 + be associated with a specific form control, known as the label element's 20 + labeled control. 21 + 22 + Content model: Phrasing content, but must not contain descendant label 23 + elements, and must not contain form control descendants other than the 24 + labeled control. *) 25 + 26 + val input : Element_spec.t 27 + (** The input element represents a typed data field, usually with a form control 28 + to allow the user to edit the data. It is a void element. 29 + 30 + Content model: Nothing (void element). 31 + 32 + The type attribute controls the data type (and associated control) of the 33 + element. *) 34 + 35 + val button : Element_spec.t 36 + (** The button element represents a button labeled by its contents. 37 + 38 + Content model: Phrasing content, but must not contain interactive content 39 + descendants. *) 40 + 41 + val select : Element_spec.t 42 + (** The select element represents a control for selecting amongst a set of 43 + options. 44 + 45 + Content model: Zero or more option, optgroup, and script-supporting elements. *) 46 + 47 + val datalist : Element_spec.t 48 + (** The datalist element represents a set of option elements that represent 49 + predefined options for other controls. In the rendering, the datalist element 50 + represents nothing and it, along with its children, should be hidden. 51 + 52 + Content model: Either phrasing content or zero or more option and 53 + script-supporting elements. *) 54 + 55 + val optgroup : Element_spec.t 56 + (** The optgroup element represents a group of option elements with a common 57 + label. 58 + 59 + Content model: Zero or more option and script-supporting elements. *) 60 + 61 + val option : Element_spec.t 62 + (** The option element represents an option in a select element or as part of a 63 + list of suggestions in a datalist element. 64 + 65 + Content model: Text, or empty if label attribute is present. *) 66 + 67 + val textarea : Element_spec.t 68 + (** The textarea element represents a multiline plain text edit control for the 69 + element's raw value. 70 + 71 + Content model: Text. *) 72 + 73 + val output : Element_spec.t 74 + (** The output element represents the result of a calculation performed by the 75 + application, or the result of a user action. 76 + 77 + Content model: Phrasing content. *) 78 + 79 + val progress : Element_spec.t 80 + (** The progress element represents the completion progress of a task. 81 + 82 + Content model: Phrasing content, but must not contain progress element 83 + descendants. *) 84 + 85 + val meter : Element_spec.t 86 + (** The meter element represents a scalar measurement within a known range, or a 87 + fractional value; for example disk usage, the relevance of a query result, or 88 + the fraction of a voting population to have selected a particular candidate. 89 + 90 + Content model: Phrasing content, but must not contain meter element 91 + descendants. *) 92 + 93 + val fieldset : Element_spec.t 94 + (** The fieldset element represents a set of form controls (or other content) 95 + grouped together, optionally with a caption. 96 + 97 + Content model: Optionally a legend element, followed by flow content. *) 98 + 99 + val legend : Element_spec.t 100 + (** The legend element represents a caption for the rest of the contents of the 101 + legend element's parent fieldset element, if any. 102 + 103 + Content model: Phrasing content, and optionally intermixed with heading 104 + content. *) 105 + 106 + (** {1 Element List} *) 107 + 108 + val all : Element_spec.t list 109 + (** List of all form element specifications. *)
+82
lib/html5_checker/content_model/elements_interactive.ml
··· 1 + open Content_category 2 + open Content_model 3 + open Attr_spec 4 + 5 + let details = 6 + Element_spec.make ~name:"details" 7 + ~categories:[ Flow; Sectioning; Palpable; Interactive ] 8 + ~content_model: 9 + (Sequence [ Elements [ "summary" ]; Zero_or_more (Categories [ Flow ]) ]) 10 + ~attrs:[ make "open" ~datatype:"boolean" () ] () 11 + 12 + let summary = 13 + Element_spec.make ~name:"summary" 14 + ~categories:[] 15 + ~content_model: 16 + (Choice [ Categories [ Phrasing ]; Categories [ Heading ] ]) 17 + ~permitted_parents:[ "details" ] 18 + ~attrs:[] () 19 + 20 + let dialog = 21 + Element_spec.make ~name:"dialog" 22 + ~categories:[ Flow; Sectioning ] 23 + ~content_model:(Categories [ Flow ]) 24 + ~attrs:[ make "open" ~datatype:"boolean" () ] ~implicit_aria_role:"dialog" () 25 + 26 + let script = 27 + Element_spec.make ~name:"script" 28 + ~categories:[ Metadata; Flow; Phrasing; Script_supporting ] 29 + ~content_model:Text 30 + ~attrs: 31 + [ 32 + make "src" ~datatype:"url" (); 33 + make "type" (); 34 + make "nomodule" ~datatype:"boolean" (); 35 + make "async" ~datatype:"boolean" (); 36 + make "defer" ~datatype:"boolean" (); 37 + make "crossorigin" ~datatype:"crossorigin" (); 38 + make "integrity" ~datatype:"integrity" (); 39 + make "referrerpolicy" ~datatype:"referrer" (); 40 + make "blocking" (); 41 + make "fetchpriority" ~datatype:"fetchpriority" (); 42 + ] 43 + () 44 + 45 + let noscript = 46 + Element_spec.make ~name:"noscript" 47 + ~categories:[ Metadata; Flow; Phrasing ] 48 + ~content_model: 49 + (Choice 50 + [ 51 + (* In head: link, style, meta *) 52 + Categories [ Metadata ]; 53 + (* In body: transparent, but no noscript descendants *) 54 + Transparent; 55 + ]) 56 + ~prohibited_ancestors:[ "noscript" ] 57 + ~attrs:[] () 58 + 59 + let template = 60 + Element_spec.make ~name:"template" 61 + ~categories:[ Metadata; Flow; Phrasing; Script_supporting ] 62 + ~content_model:Nothing 63 + ~attrs:[] () 64 + 65 + let slot = 66 + Element_spec.make ~name:"slot" 67 + ~categories:[ Flow; Phrasing ] 68 + ~content_model:Transparent 69 + ~attrs:[ make "name" () ] () 70 + 71 + let canvas = 72 + Element_spec.make ~name:"canvas" 73 + ~categories:[ Flow; Phrasing; Embedded; Palpable ] 74 + ~content_model:Transparent 75 + ~attrs: 76 + [ 77 + make "width" ~datatype:"integer" (); 78 + make "height" ~datatype:"integer" (); 79 + ] 80 + () 81 + 82 + let all = [ details; summary; dialog; script; noscript; template; slot; canvas ]
+41
lib/html5_checker/content_model/elements_interactive.mli
··· 1 + (** HTML5 interactive and scripting element specifications. 2 + 3 + Interactive elements are specifically intended for user interaction, 4 + and scripting elements support scripts in the document. 5 + See https://html.spec.whatwg.org/multipage/interactive-elements.html 6 + and https://html.spec.whatwg.org/multipage/scripting.html *) 7 + 8 + val details : Element_spec.t 9 + (** The details element represents a disclosure widget from which the user can 10 + obtain additional information or controls. *) 11 + 12 + val summary : Element_spec.t 13 + (** The summary element represents a summary, caption, or legend for the rest 14 + of the contents of the summary element's parent details element. *) 15 + 16 + val dialog : Element_spec.t 17 + (** The dialog element represents a part of an application that a user 18 + interacts with to perform a task, such as a dialog box or modal. *) 19 + 20 + val script : Element_spec.t 21 + (** The script element allows authors to include dynamic script and data blocks 22 + in their documents. *) 23 + 24 + val noscript : Element_spec.t 25 + (** The noscript element represents fallback content for when scripting is 26 + disabled or not supported. *) 27 + 28 + val template : Element_spec.t 29 + (** The template element is used to declare fragments of HTML that can be cloned 30 + and inserted in the document by script. *) 31 + 32 + val slot : Element_spec.t 33 + (** The slot element is used as a placeholder inside a web component that users 34 + can fill with their own markup. *) 35 + 36 + val canvas : Element_spec.t 37 + (** The canvas element provides scripts with a resolution-dependent bitmap 38 + canvas for rendering graphs, game graphics, art, or other visual images. *) 39 + 40 + val all : Element_spec.t list 41 + (** List of all interactive and scripting element specifications. *)
+133
lib/html5_checker/content_model/elements_table.ml
··· 1 + open Content_category 2 + open Content_model 3 + open Attr_spec 4 + 5 + let table = 6 + Element_spec.make ~name:"table" 7 + ~categories:[ Flow; Palpable ] 8 + ~content_model: 9 + (Sequence 10 + [ 11 + Optional (Elements [ "caption" ]); 12 + Zero_or_more (Elements [ "colgroup" ]); 13 + Optional (Elements [ "thead" ]); 14 + Choice 15 + [ 16 + Sequence 17 + [ 18 + Zero_or_more (Elements [ "tbody" ]); 19 + Optional (Elements [ "tfoot" ]); 20 + ]; 21 + Sequence 22 + [ 23 + Optional (Elements [ "tfoot" ]); 24 + Zero_or_more (Elements [ "tbody" ]); 25 + ]; 26 + One_or_more (Elements [ "tr" ]); 27 + ]; 28 + Zero_or_more (Elements [ "script"; "template" ]); 29 + ]) 30 + ~attrs:[] ~implicit_aria_role:"table" () 31 + 32 + let caption = 33 + Element_spec.make ~name:"caption" 34 + ~categories:[] 35 + ~content_model:(Categories [ Flow ]) 36 + ~permitted_parents:[ "table" ] 37 + ~prohibited_ancestors:[ "table" ] 38 + ~attrs:[] () 39 + 40 + let colgroup = 41 + Element_spec.make ~name:"colgroup" 42 + ~categories:[] 43 + ~content_model: 44 + (Choice 45 + [ 46 + Zero_or_more (Elements [ "col" ]); 47 + Zero_or_more (Elements [ "template" ]); 48 + ]) 49 + ~permitted_parents:[ "table" ] 50 + ~attrs:[ make "span" ~datatype:"integer" () ] () 51 + 52 + let col = 53 + Element_spec.make ~name:"col" ~void:true 54 + ~categories:[] 55 + ~content_model:Nothing 56 + ~permitted_parents:[ "colgroup" ] 57 + ~attrs:[ make "span" ~datatype:"integer" () ] () 58 + 59 + let tbody = 60 + Element_spec.make ~name:"tbody" 61 + ~categories:[] 62 + ~content_model: 63 + (Choice 64 + [ 65 + One_or_more (Elements [ "tr" ]); Zero_or_more (Elements [ "script"; "template" ]); 66 + ]) 67 + ~permitted_parents:[ "table" ] 68 + ~attrs:[] ~implicit_aria_role:"rowgroup" () 69 + 70 + let thead = 71 + Element_spec.make ~name:"thead" 72 + ~categories:[] 73 + ~content_model: 74 + (Choice 75 + [ 76 + One_or_more (Elements [ "tr" ]); Zero_or_more (Elements [ "script"; "template" ]); 77 + ]) 78 + ~permitted_parents:[ "table" ] 79 + ~attrs:[] ~implicit_aria_role:"rowgroup" () 80 + 81 + let tfoot = 82 + Element_spec.make ~name:"tfoot" 83 + ~categories:[] 84 + ~content_model: 85 + (Choice 86 + [ 87 + One_or_more (Elements [ "tr" ]); Zero_or_more (Elements [ "script"; "template" ]); 88 + ]) 89 + ~permitted_parents:[ "table" ] 90 + ~attrs:[] ~implicit_aria_role:"rowgroup" () 91 + 92 + let tr = 93 + Element_spec.make ~name:"tr" 94 + ~categories:[] 95 + ~content_model: 96 + (Choice 97 + [ 98 + One_or_more (Elements [ "td"; "th" ]); 99 + Zero_or_more (Elements [ "script"; "template" ]); 100 + ]) 101 + ~permitted_parents:[ "table"; "thead"; "tbody"; "tfoot" ] 102 + ~attrs:[] ~implicit_aria_role:"row" () 103 + 104 + let td = 105 + Element_spec.make ~name:"td" 106 + ~categories:[ Sectioning ] 107 + ~content_model:(Categories [ Flow ]) 108 + ~permitted_parents:[ "tr" ] 109 + ~attrs: 110 + [ 111 + make "colspan" ~datatype:"integer" (); 112 + make "rowspan" ~datatype:"integer" (); 113 + make "headers" (); 114 + ] 115 + ~implicit_aria_role:"cell" () 116 + 117 + let th = 118 + Element_spec.make ~name:"th" 119 + ~categories:[] 120 + ~content_model:(Mixed [ Flow ]) 121 + ~prohibited_ancestors:[ "header"; "footer" ] 122 + ~permitted_parents:[ "tr" ] 123 + ~attrs: 124 + [ 125 + make "colspan" ~datatype:"integer" (); 126 + make "rowspan" ~datatype:"integer" (); 127 + make "headers" (); 128 + make "scope" ~datatype:"scope" (); 129 + make "abbr" (); 130 + ] 131 + ~implicit_aria_role:"columnheader" () 132 + 133 + let all = [ table; caption; colgroup; col; tbody; thead; tfoot; tr; td; th ]
+42
lib/html5_checker/content_model/elements_table.mli
··· 1 + (** HTML5 table element specifications. 2 + 3 + Table elements represent data with more than one dimension. 4 + See https://html.spec.whatwg.org/multipage/tables.html *) 5 + 6 + val table : Element_spec.t 7 + (** The table element represents data with more than one dimension in the form 8 + of a table. *) 9 + 10 + val caption : Element_spec.t 11 + (** The caption element represents the title of the table. *) 12 + 13 + val colgroup : Element_spec.t 14 + (** The colgroup element represents a group of one or more columns in the table. *) 15 + 16 + val col : Element_spec.t 17 + (** The col element represents one or more columns in the table. It is a void 18 + element. *) 19 + 20 + val tbody : Element_spec.t 21 + (** The tbody element represents a block of rows that consist of a body of data 22 + for the table. *) 23 + 24 + val thead : Element_spec.t 25 + (** The thead element represents the block of rows that consist of the column 26 + labels (headers) for the table. *) 27 + 28 + val tfoot : Element_spec.t 29 + (** The tfoot element represents the block of rows that consist of the column 30 + summaries (footers) for the table. *) 31 + 32 + val tr : Element_spec.t 33 + (** The tr element represents a row of cells in a table. *) 34 + 35 + val td : Element_spec.t 36 + (** The td element represents a data cell in a table. *) 37 + 38 + val th : Element_spec.t 39 + (** The th element represents a header cell in a table. *) 40 + 41 + val all : Element_spec.t list 42 + (** List of all table element specifications. *)
+231
lib/html5_checker/content_model/elements_text.ml
··· 1 + open Content_category 2 + open Content_model 3 + 4 + let a = 5 + Element_spec.make ~name:"a" 6 + ~categories:[Flow; Phrasing; Interactive; Palpable] 7 + ~content_model:(Except (Transparent, [Interactive])) 8 + ~attrs:[ 9 + Attr_spec.make "href" ~datatype:"url" (); 10 + Attr_spec.make "target" ~datatype:"target" (); 11 + Attr_spec.make "download" ~datatype:"string" (); 12 + Attr_spec.make "ping" ~datatype:"string" (); 13 + Attr_spec.make "referrerpolicy" ~datatype:"referrerpolicy" (); 14 + Attr_spec.make "rel" ~datatype:"relationship" (); 15 + Attr_spec.make "hreflang" ~datatype:"string" (); 16 + Attr_spec.make "type" ~datatype:"mimetype" (); 17 + ] 18 + ~implicit_aria_role:"link" 19 + () 20 + 21 + let em = 22 + Element_spec.make ~name:"em" 23 + ~categories:[Flow; Phrasing; Palpable] 24 + ~content_model:(Categories [Phrasing]) 25 + () 26 + 27 + let strong = 28 + Element_spec.make ~name:"strong" 29 + ~categories:[Flow; Phrasing; Palpable] 30 + ~content_model:(Categories [Phrasing]) 31 + () 32 + 33 + let small = 34 + Element_spec.make ~name:"small" 35 + ~categories:[Flow; Phrasing; Palpable] 36 + ~content_model:(Categories [Phrasing]) 37 + () 38 + 39 + let s = 40 + Element_spec.make ~name:"s" 41 + ~categories:[Flow; Phrasing; Palpable] 42 + ~content_model:(Categories [Phrasing]) 43 + () 44 + 45 + let cite = 46 + Element_spec.make ~name:"cite" 47 + ~categories:[Flow; Phrasing; Palpable] 48 + ~content_model:(Categories [Phrasing]) 49 + () 50 + 51 + let q = 52 + Element_spec.make ~name:"q" 53 + ~categories:[Flow; Phrasing; Palpable] 54 + ~content_model:(Categories [Phrasing]) 55 + ~attrs:[ 56 + Attr_spec.make "cite" ~datatype:"url" (); 57 + ] 58 + () 59 + 60 + let dfn = 61 + Element_spec.make ~name:"dfn" 62 + ~categories:[Flow; Phrasing; Palpable] 63 + ~content_model:(Except (Categories [Phrasing], [])) 64 + ~prohibited_ancestors:["dfn"] 65 + () 66 + 67 + let abbr = 68 + Element_spec.make ~name:"abbr" 69 + ~categories:[Flow; Phrasing; Palpable] 70 + ~content_model:(Categories [Phrasing]) 71 + () 72 + 73 + let ruby = 74 + Element_spec.make ~name:"ruby" 75 + ~categories:[Flow; Phrasing; Palpable] 76 + ~content_model:(Categories [Phrasing]) 77 + () 78 + 79 + let rt = 80 + Element_spec.make ~name:"rt" 81 + ~categories:[Phrasing] 82 + ~content_model:(Categories [Phrasing]) 83 + ~permitted_parents:["ruby"; "rtc"] 84 + () 85 + 86 + let rp = 87 + Element_spec.make ~name:"rp" 88 + ~categories:[] 89 + ~content_model:Text 90 + ~permitted_parents:["ruby"; "rtc"] 91 + () 92 + 93 + let data = 94 + Element_spec.make ~name:"data" 95 + ~categories:[Flow; Phrasing; Palpable] 96 + ~content_model:(Categories [Phrasing]) 97 + ~attrs:[ 98 + Attr_spec.make "value" ~required:true ~datatype:"string" (); 99 + ] 100 + () 101 + 102 + let time = 103 + Element_spec.make ~name:"time" 104 + ~categories:[Flow; Phrasing; Palpable] 105 + ~content_model:(Categories [Phrasing]) 106 + ~attrs:[ 107 + Attr_spec.make "datetime" ~datatype:"datetime" (); 108 + ] 109 + ~prohibited_ancestors:["time"] 110 + () 111 + 112 + let code = 113 + Element_spec.make ~name:"code" 114 + ~categories:[Flow; Phrasing; Palpable] 115 + ~content_model:(Categories [Phrasing]) 116 + () 117 + 118 + let var = 119 + Element_spec.make ~name:"var" 120 + ~categories:[Flow; Phrasing; Palpable] 121 + ~content_model:(Categories [Phrasing]) 122 + () 123 + 124 + let samp = 125 + Element_spec.make ~name:"samp" 126 + ~categories:[Flow; Phrasing; Palpable] 127 + ~content_model:(Categories [Phrasing]) 128 + () 129 + 130 + let kbd = 131 + Element_spec.make ~name:"kbd" 132 + ~categories:[Flow; Phrasing; Palpable] 133 + ~content_model:(Categories [Phrasing]) 134 + () 135 + 136 + let sub = 137 + Element_spec.make ~name:"sub" 138 + ~categories:[Flow; Phrasing; Palpable] 139 + ~content_model:(Categories [Phrasing]) 140 + () 141 + 142 + let sup = 143 + Element_spec.make ~name:"sup" 144 + ~categories:[Flow; Phrasing; Palpable] 145 + ~content_model:(Categories [Phrasing]) 146 + () 147 + 148 + let i = 149 + Element_spec.make ~name:"i" 150 + ~categories:[Flow; Phrasing; Palpable] 151 + ~content_model:(Categories [Phrasing]) 152 + () 153 + 154 + let b = 155 + Element_spec.make ~name:"b" 156 + ~categories:[Flow; Phrasing; Palpable] 157 + ~content_model:(Categories [Phrasing]) 158 + () 159 + 160 + let u = 161 + Element_spec.make ~name:"u" 162 + ~categories:[Flow; Phrasing; Palpable] 163 + ~content_model:(Categories [Phrasing]) 164 + () 165 + 166 + let mark = 167 + Element_spec.make ~name:"mark" 168 + ~categories:[Flow; Phrasing; Palpable] 169 + ~content_model:(Categories [Phrasing]) 170 + () 171 + 172 + let bdi = 173 + Element_spec.make ~name:"bdi" 174 + ~categories:[Flow; Phrasing; Palpable] 175 + ~content_model:(Categories [Phrasing]) 176 + () 177 + 178 + let bdo = 179 + Element_spec.make ~name:"bdo" 180 + ~categories:[Flow; Phrasing; Palpable] 181 + ~content_model:(Categories [Phrasing]) 182 + ~attrs:[ 183 + Attr_spec.make "dir" ~required:true ~datatype:"dir" (); 184 + ] 185 + () 186 + 187 + let span = 188 + Element_spec.make ~name:"span" 189 + ~categories:[Flow; Phrasing; Palpable] 190 + ~content_model:(Categories [Phrasing]) 191 + () 192 + 193 + let br = 194 + Element_spec.make ~name:"br" 195 + ~void:true 196 + ~categories:[Flow; Phrasing] 197 + ~content_model:Nothing 198 + () 199 + 200 + let wbr = 201 + Element_spec.make ~name:"wbr" 202 + ~void:true 203 + ~categories:[Flow; Phrasing] 204 + ~content_model:Nothing 205 + () 206 + 207 + let ins = 208 + Element_spec.make ~name:"ins" 209 + ~categories:[Flow; Phrasing; Palpable] 210 + ~content_model:Transparent 211 + ~attrs:[ 212 + Attr_spec.make "cite" ~datatype:"url" (); 213 + Attr_spec.make "datetime" ~datatype:"datetime" (); 214 + ] 215 + () 216 + 217 + let del = 218 + Element_spec.make ~name:"del" 219 + ~categories:[Flow; Phrasing] 220 + ~content_model:Transparent 221 + ~attrs:[ 222 + Attr_spec.make "cite" ~datatype:"url" (); 223 + Attr_spec.make "datetime" ~datatype:"datetime" (); 224 + ] 225 + () 226 + 227 + let all = [ 228 + a; em; strong; small; s; cite; q; dfn; abbr; ruby; rt; rp; 229 + data; time; code; var; samp; kbd; sub; sup; i; b; u; mark; 230 + bdi; bdo; span; br; wbr; ins; del 231 + ]
+204
lib/html5_checker/content_model/elements_text.mli
··· 1 + (** HTML5 text-level and edit element specifications. 2 + 3 + Text-level semantic elements and edit tracking elements. 4 + See https://html.spec.whatwg.org/multipage/text-level-semantics.html 5 + and https://html.spec.whatwg.org/multipage/edits.html *) 6 + 7 + (** {1 Hyperlinks} *) 8 + 9 + val a : Element_spec.t 10 + (** The a element represents a hyperlink. When it has an href attribute, it 11 + represents a hyperlink (a hypertext anchor) labeled by its contents. 12 + 13 + Content model: Transparent, but must not contain interactive content 14 + descendants. *) 15 + 16 + (** {1 Text-level Semantics} *) 17 + 18 + val em : Element_spec.t 19 + (** The em element represents stress emphasis of its contents. The level of 20 + emphasis is given by its number of ancestor em elements. 21 + 22 + Content model: Phrasing content. *) 23 + 24 + val strong : Element_spec.t 25 + (** The strong element represents strong importance, seriousness, or urgency 26 + for its contents. 27 + 28 + Content model: Phrasing content. *) 29 + 30 + val small : Element_spec.t 31 + (** The small element represents side comments such as small print. Small print 32 + typically features disclaimers, caveats, legal restrictions, or copyrights. 33 + 34 + Content model: Phrasing content. *) 35 + 36 + val s : Element_spec.t 37 + (** The s element represents contents that are no longer accurate or no longer 38 + relevant. 39 + 40 + Content model: Phrasing content. *) 41 + 42 + val cite : Element_spec.t 43 + (** The cite element represents the title of a work (e.g. a book, a paper, 44 + an essay, a poem, a score, a song, a script, a film, a TV show, a game, 45 + a sculpture, a painting, a theatre production, a play, an opera, a musical, 46 + an exhibition, a legal case report, a computer program, etc). 47 + 48 + Content model: Phrasing content. *) 49 + 50 + val q : Element_spec.t 51 + (** The q element represents some phrasing content quoted from another source. 52 + 53 + Content model: Phrasing content. *) 54 + 55 + val dfn : Element_spec.t 56 + (** The dfn element represents the defining instance of a term. 57 + 58 + Content model: Phrasing content, but must not contain dfn element descendants. *) 59 + 60 + val abbr : Element_spec.t 61 + (** The abbr element represents an abbreviation or acronym, optionally with its 62 + expansion. The title attribute may be used to provide an expansion of the 63 + abbreviation. 64 + 65 + Content model: Phrasing content. *) 66 + 67 + val ruby : Element_spec.t 68 + (** The ruby element allows one or more spans of phrasing content to be marked 69 + with ruby annotations. Ruby annotations are short runs of text presented 70 + alongside base text, primarily used in East Asian typography. 71 + 72 + Content model: Phrasing content, but must contain at least one rt or rp 73 + element. *) 74 + 75 + val rt : Element_spec.t 76 + (** The rt element marks the ruby text component of a ruby annotation. 77 + 78 + Content model: Phrasing content. *) 79 + 80 + val rp : Element_spec.t 81 + (** The rp element is used to provide fallback parentheses for browsers that 82 + don't support ruby annotations. 83 + 84 + Content model: Text, or phrasing content that represents what can be used 85 + as fallback in annotations. *) 86 + 87 + val data : Element_spec.t 88 + (** The data element represents its contents, along with a machine-readable 89 + form of those contents in the value attribute. 90 + 91 + Content model: Phrasing content. *) 92 + 93 + val time : Element_spec.t 94 + (** The time element represents its contents, along with a machine-readable 95 + form of those contents in the datetime attribute. 96 + 97 + Content model: Phrasing content, but must not contain time element descendants. *) 98 + 99 + val code : Element_spec.t 100 + (** The code element represents a fragment of computer code. 101 + 102 + Content model: Phrasing content. *) 103 + 104 + val var : Element_spec.t 105 + (** The var element represents a variable in a mathematical expression or a 106 + programming context. 107 + 108 + Content model: Phrasing content. *) 109 + 110 + val samp : Element_spec.t 111 + (** The samp element represents sample or quoted output from another program 112 + or computing system. 113 + 114 + Content model: Phrasing content. *) 115 + 116 + val kbd : Element_spec.t 117 + (** The kbd element represents user input (typically keyboard input, although 118 + it may also be used to represent other input, such as voice commands). 119 + 120 + Content model: Phrasing content. *) 121 + 122 + val sub : Element_spec.t 123 + (** The sub element represents a subscript. 124 + 125 + Content model: Phrasing content. *) 126 + 127 + val sup : Element_spec.t 128 + (** The sup element represents a superscript. 129 + 130 + Content model: Phrasing content. *) 131 + 132 + val i : Element_spec.t 133 + (** The i element represents a span of text in an alternate voice or mood, or 134 + otherwise offset from the normal prose in a manner indicating a different 135 + quality of text. 136 + 137 + Content model: Phrasing content. *) 138 + 139 + val b : Element_spec.t 140 + (** The b element represents a span of text to which attention is being drawn 141 + for utilitarian purposes without conveying any extra importance and with no 142 + implication of an alternate voice or mood. 143 + 144 + Content model: Phrasing content. *) 145 + 146 + val u : Element_spec.t 147 + (** The u element represents a span of text with an unarticulated, though 148 + explicitly rendered, non-textual annotation, such as labeling the text as 149 + being a proper name in Chinese text or labeling the text as being 150 + misspelt. 151 + 152 + Content model: Phrasing content. *) 153 + 154 + val mark : Element_spec.t 155 + (** The mark element represents a run of text in one document marked or 156 + highlighted for reference purposes, due to its relevance in another context. 157 + 158 + Content model: Phrasing content. *) 159 + 160 + val bdi : Element_spec.t 161 + (** The bdi element represents a span of text that is to be isolated from its 162 + surroundings for the purposes of bidirectional text formatting. 163 + 164 + Content model: Phrasing content. *) 165 + 166 + val bdo : Element_spec.t 167 + (** The bdo element represents explicit text directionality formatting control 168 + for its children. It allows authors to override the Unicode bidirectional 169 + algorithm. 170 + 171 + Content model: Phrasing content. *) 172 + 173 + val span : Element_spec.t 174 + (** The span element doesn't mean anything on its own, but can be useful when 175 + used together with the global attributes, e.g. class, lang, or dir. 176 + 177 + Content model: Phrasing content. *) 178 + 179 + val br : Element_spec.t 180 + (** The br element represents a line break. It is a void element. 181 + 182 + Content model: Nothing (void element). *) 183 + 184 + val wbr : Element_spec.t 185 + (** The wbr element represents a line break opportunity. It is a void element. 186 + 187 + Content model: Nothing (void element). *) 188 + 189 + (** {1 Edits} *) 190 + 191 + val ins : Element_spec.t 192 + (** The ins element represents an addition to the document. 193 + 194 + Content model: Transparent. *) 195 + 196 + val del : Element_spec.t 197 + (** The del element represents a removal from the document. 198 + 199 + Content model: Transparent. *) 200 + 201 + (** {1 Element List} *) 202 + 203 + val all : Element_spec.t list 204 + (** List of all text-level and edit element specifications. *)
lib/html5_checker/datatype/datatype.cmi

This is a binary file and will not be displayed.

+42
lib/html5_checker/datatype/datatype.ml
··· 1 + module type S = sig 2 + val name : string 3 + val validate : string -> (unit, string) result 4 + val is_valid : string -> bool 5 + end 6 + 7 + type t = (module S) 8 + 9 + let name (module D : S) = D.name 10 + let validate (module D : S) s = D.validate s 11 + let is_valid (module D : S) s = D.is_valid s 12 + 13 + (* Helper utilities *) 14 + 15 + let is_whitespace = function 16 + | ' ' | '\t' | '\n' | '\r' | '\012' (* FF *) -> true 17 + | _ -> false 18 + 19 + let is_ascii_digit = function '0' .. '9' -> true | _ -> false 20 + 21 + let to_ascii_lowercase c = 22 + match c with 'A' .. 'Z' -> Char.chr (Char.code c + 32) | _ -> c 23 + 24 + let string_to_ascii_lowercase s = 25 + String.map to_ascii_lowercase s 26 + 27 + let trim_html_spaces s = 28 + let len = String.length s in 29 + let rec find_start i = 30 + if i >= len then len 31 + else if is_whitespace s.[i] then find_start (i + 1) 32 + else i 33 + in 34 + let rec find_end i = 35 + if i < 0 then -1 36 + else if is_whitespace s.[i] then find_end (i - 1) 37 + else i 38 + in 39 + let start = find_start 0 in 40 + let end_pos = find_end (len - 1) in 41 + if start > end_pos then "" 42 + else String.sub s start (end_pos - start + 1)
+45
lib/html5_checker/datatype/datatype.mli
··· 1 + (** HTML5 datatype validation. 2 + 3 + This module provides the base interface for HTML5 attribute datatype 4 + validators. Each datatype validates string values according to HTML5 spec. *) 5 + 6 + (** A datatype validator *) 7 + module type S = sig 8 + (** Name of this datatype (e.g., "integer", "url") *) 9 + val name : string 10 + 11 + (** Validate a string value. Returns Ok () if valid, Error message otherwise *) 12 + val validate : string -> (unit, string) result 13 + 14 + (** Check if value is valid (convenience function) *) 15 + val is_valid : string -> bool 16 + end 17 + 18 + (** A datatype packed as a first-class module *) 19 + type t = (module S) 20 + 21 + (** Get the name of a datatype *) 22 + val name : t -> string 23 + 24 + (** Validate a value with a datatype *) 25 + val validate : t -> string -> (unit, string) result 26 + 27 + (** Check if a value is valid *) 28 + val is_valid : t -> string -> bool 29 + 30 + (** Helper utilities for implementing datatype validators. *) 31 + 32 + (** Check if a character is HTML5 whitespace (space, tab, LF, FF, or CR). *) 33 + val is_whitespace : char -> bool 34 + 35 + (** Check if a character is an ASCII digit (0-9). *) 36 + val is_ascii_digit : char -> bool 37 + 38 + (** Convert an ASCII character to lowercase. *) 39 + val to_ascii_lowercase : char -> char 40 + 41 + (** Convert an ASCII string to lowercase. *) 42 + val string_to_ascii_lowercase : string -> string 43 + 44 + (** Trim HTML5 whitespace from both ends of a string. *) 45 + val trim_html_spaces : string -> string
+31
lib/html5_checker/datatype/datatype_registry.ml
··· 1 + type t = { datatypes : (string, Datatype.t) Hashtbl.t } 2 + 3 + let create () = { datatypes = Hashtbl.create 16 } 4 + 5 + let register t dt = 6 + let name = Datatype.name dt in 7 + Hashtbl.replace t.datatypes name dt 8 + 9 + let get t name = Hashtbl.find_opt t.datatypes name 10 + 11 + let list_names t = 12 + Hashtbl.fold (fun name _ acc -> name :: acc) t.datatypes [] 13 + |> List.sort String.compare 14 + 15 + let default = 16 + let registry = ref None in 17 + fun () -> 18 + match !registry with 19 + | Some r -> r 20 + | None -> 21 + let r = create () in 22 + (* Register built-in datatypes *) 23 + register r (module Dt_integer.Integer : Datatype.S); 24 + register r (module Dt_integer.Integer_non_negative : Datatype.S); 25 + register r (module Dt_integer.Integer_positive : Datatype.S); 26 + register r (module Dt_float.Float_ : Datatype.S); 27 + register r (module Dt_float.Float_non_negative : Datatype.S); 28 + register r (module Dt_float.Float_positive : Datatype.S); 29 + register r (module Dt_boolean.Boolean : Datatype.S); 30 + registry := Some r; 31 + r
+19
lib/html5_checker/datatype/datatype_registry.mli
··· 1 + (** Registry for HTML5 datatypes *) 2 + 3 + (** Registry type that holds datatypes indexed by name *) 4 + type t 5 + 6 + (** Create a new empty registry *) 7 + val create : unit -> t 8 + 9 + (** Register a datatype in the registry *) 10 + val register : t -> Datatype.t -> unit 11 + 12 + (** Get a datatype by name. Returns None if not found *) 13 + val get : t -> string -> Datatype.t option 14 + 15 + (** List all registered datatype names *) 16 + val list_names : t -> string list 17 + 18 + (** Default registry with all built-in datatypes *) 19 + val default : unit -> t
+229
lib/html5_checker/datatype/dt_autocomplete.ml
··· 1 + (** Autocomplete attribute validation based on HTML5 spec *) 2 + 3 + (** Check if character is whitespace *) 4 + let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r' 5 + 6 + (** Convert character to ASCII lowercase *) 7 + let to_ascii_lowercase c = 8 + if c >= 'A' && c <= 'Z' then Char.chr (Char.code c + 32) else c 9 + 10 + (** Trim whitespace from string *) 11 + let trim_whitespace s = 12 + let s = String.trim s in 13 + (* Also collapse internal whitespace *) 14 + let buf = Buffer.create (String.length s) in 15 + let in_space = ref false in 16 + String.iter 17 + (fun c -> 18 + if is_whitespace c then 19 + if not !in_space then ( 20 + Buffer.add_char buf ' '; 21 + in_space := true) 22 + else ( 23 + Buffer.add_char buf (to_ascii_lowercase c); 24 + in_space := false)) 25 + s; 26 + Buffer.contents buf 27 + 28 + (** Contact type tokens *) 29 + let contact_types = [ "home"; "work"; "mobile"; "fax"; "pager" ] 30 + 31 + (** All autofill field names *) 32 + let all_field_names = 33 + [ 34 + "name"; 35 + "honorific-prefix"; 36 + "given-name"; 37 + "additional-name"; 38 + "family-name"; 39 + "honorific-suffix"; 40 + "nickname"; 41 + "organization-title"; 42 + "username"; 43 + "new-password"; 44 + "current-password"; 45 + "one-time-code"; 46 + "organization"; 47 + "street-address"; 48 + "address-line1"; 49 + "address-line2"; 50 + "address-line3"; 51 + "address-level4"; 52 + "address-level3"; 53 + "address-level2"; 54 + "address-level1"; 55 + "country"; 56 + "country-name"; 57 + "postal-code"; 58 + "cc-name"; 59 + "cc-given-name"; 60 + "cc-additional-name"; 61 + "cc-family-name"; 62 + "cc-number"; 63 + "cc-exp"; 64 + "cc-exp-month"; 65 + "cc-exp-year"; 66 + "cc-csc"; 67 + "cc-type"; 68 + "transaction-currency"; 69 + "transaction-amount"; 70 + "language"; 71 + "bday"; 72 + "bday-day"; 73 + "bday-month"; 74 + "bday-year"; 75 + "sex"; 76 + "url"; 77 + "photo"; 78 + "tel"; 79 + "tel-country-code"; 80 + "tel-national"; 81 + "tel-area-code"; 82 + "tel-local"; 83 + "tel-local-prefix"; 84 + "tel-local-suffix"; 85 + "tel-extension"; 86 + "email"; 87 + "impp"; 88 + ] 89 + 90 + (** Contact field names (subset that can be used with contact types) *) 91 + let contact_field_names = 92 + [ 93 + "tel"; 94 + "tel-country-code"; 95 + "tel-national"; 96 + "tel-area-code"; 97 + "tel-local"; 98 + "tel-local-prefix"; 99 + "tel-local-suffix"; 100 + "tel-extension"; 101 + "email"; 102 + "impp"; 103 + ] 104 + 105 + (** Split string on whitespace *) 106 + let split_on_whitespace s = 107 + let rec split acc start i = 108 + if i >= String.length s then 109 + if start < i then List.rev (String.sub s start (i - start) :: acc) 110 + else List.rev acc 111 + else if is_whitespace s.[i] then 112 + if start < i then 113 + split (String.sub s start (i - start) :: acc) (i + 1) (i + 1) 114 + else split acc (i + 1) (i + 1) 115 + else split acc start (i + 1) 116 + in 117 + split [] 0 0 118 + 119 + (** Check if string starts with prefix *) 120 + let starts_with s prefix = 121 + String.length s >= String.length prefix 122 + && String.sub s 0 (String.length prefix) = prefix 123 + 124 + (** Validate detail tokens *) 125 + let check_tokens tokens = 126 + let tokens = ref tokens in 127 + let is_contact_details = ref false in 128 + 129 + (* Check for section-* *) 130 + (match !tokens with 131 + | token :: rest when starts_with token "section-" -> 132 + tokens := rest 133 + | _ -> ()); 134 + 135 + (* Check for shipping/billing *) 136 + (match !tokens with 137 + | "shipping" :: rest | "billing" :: rest -> 138 + tokens := rest 139 + | _ -> ()); 140 + 141 + (* Check for contact type *) 142 + (match !tokens with 143 + | token :: rest when List.mem token contact_types -> 144 + tokens := rest; 145 + is_contact_details := true 146 + | _ -> ()); 147 + 148 + (* Process remaining tokens *) 149 + let process_field_tokens = function 150 + | [] -> Error "A list of autofill details tokens must contain an autofill field name" 151 + | [ "webauthn" ] -> 152 + Error 153 + "The token \"webauthn\" must not be the only token in a list of \ 154 + autofill detail tokens" 155 + | [ field_name ] -> 156 + if not (List.mem field_name all_field_names) then 157 + Error 158 + (Printf.sprintf 159 + "The string \"%s\" is not a valid autofill field name" 160 + field_name) 161 + else if !is_contact_details && not (List.mem field_name contact_field_names) 162 + then 163 + Error 164 + (Printf.sprintf 165 + "The autofill field name \"%s\" is not allowed in contact \ 166 + context" 167 + field_name) 168 + else Ok () 169 + | [ field_name; "webauthn" ] -> 170 + if not (List.mem field_name all_field_names) then 171 + Error 172 + (Printf.sprintf 173 + "The string \"%s\" is not a valid autofill field name" 174 + field_name) 175 + else if !is_contact_details && not (List.mem field_name contact_field_names) 176 + then 177 + Error 178 + (Printf.sprintf 179 + "The autofill field name \"%s\" is not allowed in contact \ 180 + context" 181 + field_name) 182 + else Ok () 183 + | token :: _ when List.mem token contact_types -> 184 + Error 185 + (Printf.sprintf 186 + "The token \"%s\" must only appear before any autofill field names" 187 + token) 188 + | token :: _ when starts_with token "section-" -> 189 + Error 190 + "A \"section-*\" indicator must only appear as the first token in a \ 191 + list of autofill detail tokens" 192 + | "shipping" :: _ | "billing" :: _ as toks -> 193 + Error 194 + (Printf.sprintf 195 + "The token \"%s\" must only appear as either the first token in a \ 196 + list of autofill detail tokens, or, if the first token is a \ 197 + \"section-*\" indicator, as the second token" 198 + (List.hd toks)) 199 + | _ :: "webauthn" :: _ :: _ -> 200 + Error 201 + "The token \"webauthn\" must only appear as the very last token in a \ 202 + list of autofill detail tokens" 203 + | _ :: _ :: _ -> 204 + Error 205 + "A list of autofill details tokens must not contain more than one \ 206 + autofill field name" 207 + in 208 + process_field_tokens !tokens 209 + 210 + (** Validate autocomplete value *) 211 + let validate_autocomplete s = 212 + let trimmed = trim_whitespace s in 213 + if String.length trimmed = 0 then Error "Must not be empty" 214 + else if trimmed = "on" || trimmed = "off" then Ok () 215 + else 216 + let tokens = split_on_whitespace trimmed in 217 + check_tokens tokens 218 + 219 + module Autocomplete = struct 220 + let name = "autocomplete" 221 + let validate = validate_autocomplete 222 + 223 + let is_valid s = 224 + match validate s with 225 + | Ok () -> true 226 + | Error _ -> false 227 + end 228 + 229 + let datatypes = [ (module Autocomplete : Datatype.S) ]
+41
lib/html5_checker/datatype/dt_autocomplete.mli
··· 1 + (** Autocomplete attribute datatype validator. 2 + 3 + This module provides a validator for the autocomplete attribute used on 4 + form fields, as defined by the HTML5 specification. *) 5 + 6 + (** Autocomplete attribute validator. 7 + 8 + Validates autocomplete attribute values which can be: 9 + - "on" or "off" (simple values) 10 + - Autofill detail tokens in the format: 11 + [section-*] [shipping|billing] [contact-type] field-name [webauthn] 12 + 13 + Contact types: home, work, mobile, fax, pager 14 + 15 + Field names include: 16 + - Name fields: name, honorific-prefix, given-name, additional-name, 17 + family-name, honorific-suffix, nickname, organization-title 18 + - Authentication: username, new-password, current-password, one-time-code 19 + - Organization: organization 20 + - Address: street-address, address-line1, address-line2, address-line3, 21 + address-level1, address-level2, address-level3, address-level4, 22 + country, country-name, postal-code 23 + - Credit card: cc-name, cc-given-name, cc-additional-name, cc-family-name, 24 + cc-number, cc-exp, cc-exp-month, cc-exp-year, cc-csc, cc-type 25 + - Transaction: transaction-currency, transaction-amount 26 + - Other: language, bday, bday-day, bday-month, bday-year, sex, url, photo 27 + - Contact: tel, tel-country-code, tel-national, tel-area-code, tel-local, 28 + tel-local-prefix, tel-local-suffix, tel-extension, email, impp 29 + 30 + Examples: 31 + - "on" 32 + - "off" 33 + - "name" 34 + - "email" 35 + - "shipping street-address" 36 + - "section-blue billing email" 37 + - "work tel" *) 38 + module Autocomplete : Datatype.S 39 + 40 + (** List of all datatypes defined in this module *) 41 + val datatypes : Datatype.t list
+38
lib/html5_checker/datatype/dt_boolean.ml
··· 1 + (** Boolean attribute validation for HTML5 *) 2 + module Boolean = struct 3 + let name = "boolean" 4 + 5 + let validate s = 6 + match s with 7 + | "" | "true" | "false" -> Ok () 8 + | _ -> 9 + Error 10 + (Printf.sprintf 11 + "The value '%s' is not a valid boolean. Expected empty string, \ 12 + 'true', or 'false'." 13 + s) 14 + 15 + let is_valid s = Result.is_ok (validate s) 16 + 17 + let with_name attr_name = 18 + let module M = struct 19 + let name = "boolean" 20 + 21 + let validate s = 22 + match s with 23 + | "" | "true" | "false" -> Ok () 24 + | _ -> 25 + let s_lower = Datatype.string_to_ascii_lowercase s in 26 + let attr_lower = Datatype.string_to_ascii_lowercase attr_name in 27 + if s_lower = attr_lower then Ok () 28 + else 29 + Error 30 + (Printf.sprintf 31 + "The value '%s' is not a valid boolean. Expected empty \ 32 + string, 'true', 'false', or '%s'." 33 + s attr_name) 34 + 35 + let is_valid s = Result.is_ok (validate s) 36 + end in 37 + (module M : Datatype.S) 38 + end
+19
lib/html5_checker/datatype/dt_boolean.mli
··· 1 + (** Boolean attribute datatype validator for HTML5 *) 2 + 3 + (** Boolean attribute validation. 4 + 5 + In HTML5, boolean attributes can have the following values: 6 + - Empty string 7 + - "true" 8 + - "false" 9 + - The attribute name itself (case-insensitive) 10 + 11 + For attribute-name validation, use [Boolean.with_name]. *) 12 + module Boolean : sig 13 + include Datatype.S 14 + 15 + (** Create a boolean validator that also accepts a specific attribute name. 16 + For example, [with_name "disabled"] will accept "", "true", "false", or 17 + "disabled" (case-insensitive). *) 18 + val with_name : string -> (module Datatype.S) 19 + end
+22
lib/html5_checker/datatype/dt_button_type.ml
··· 1 + (** Button type attribute validation based on HTML5 spec *) 2 + 3 + (** Valid button type values *) 4 + let valid_types = [ "submit"; "reset"; "button" ] 5 + 6 + module Button_type = struct 7 + let name = "button-type" 8 + 9 + let validate s = 10 + let s_lower = Datatype.string_to_ascii_lowercase s in 11 + if List.mem s_lower valid_types then Ok () 12 + else 13 + Error 14 + (Printf.sprintf 15 + "The value '%s' is not a valid button type. Expected one of: %s." 16 + s 17 + (String.concat ", " valid_types)) 18 + 19 + let is_valid s = Result.is_ok (validate s) 20 + end 21 + 22 + let datatypes = [ (module Button_type : Datatype.S) ]
+22
lib/html5_checker/datatype/dt_button_type.mli
··· 1 + (** Button type attribute datatype validator. 2 + 3 + This module provides a validator for the type attribute used on 4 + button elements, as defined by the HTML5 specification. *) 5 + 6 + (** Button type attribute validator. 7 + 8 + Validates button type attribute values which can be: 9 + - submit - Submit button (submits the form) 10 + - reset - Reset button (resets the form) 11 + - button - Push button (no default behavior) 12 + 13 + Values are matched case-insensitively according to HTML5 spec. 14 + 15 + Examples: 16 + - "submit" 17 + - "reset" 18 + - "button" *) 19 + module Button_type : Datatype.S 20 + 21 + (** List of all datatypes defined in this module *) 22 + val datatypes : Datatype.t list
+136
lib/html5_checker/datatype/dt_charset.ml
··· 1 + (** Helper functions for charset validation *) 2 + 3 + let is_valid_charset_char c = 4 + (c >= '0' && c <= '9') || 5 + (c >= 'a' && c <= 'z') || 6 + (c >= 'A' && c <= 'Z') || 7 + c = '-' || c = '!' || c = '#' || c = '$' || c = '%' || c = '&' || 8 + c = '\'' || c = '+' || c = '_' || c = '`' || c = '{' || c = '}' || 9 + c = '~' || c = '^' 10 + 11 + let to_lower s = String.lowercase_ascii s 12 + 13 + (** Common encoding labels recognized by WHATWG Encoding Standard. 14 + This is a subset of the full list. *) 15 + let known_charsets = [ 16 + (* UTF-8 *) 17 + "utf-8"; "utf8"; "unicode-1-1-utf-8"; 18 + (* Legacy single-byte encodings *) 19 + "iso-8859-1"; "iso8859-1"; "latin1"; "iso-8859-2"; "iso-8859-3"; 20 + "iso-8859-4"; "iso-8859-5"; "iso-8859-6"; "iso-8859-7"; "iso-8859-8"; 21 + "iso-8859-9"; "iso-8859-10"; "iso-8859-13"; "iso-8859-14"; "iso-8859-15"; 22 + "iso-8859-16"; 23 + (* Windows code pages *) 24 + "windows-1250"; "windows-1251"; "windows-1252"; "windows-1253"; 25 + "windows-1254"; "windows-1255"; "windows-1256"; "windows-1257"; 26 + "windows-1258"; 27 + (* Other common encodings *) 28 + "us-ascii"; "ascii"; "utf-16"; "utf-16le"; "utf-16be"; 29 + "gb2312"; "gbk"; "gb18030"; "big5"; "euc-jp"; "iso-2022-jp"; 30 + "shift_jis"; "euc-kr"; "koi8-r"; "koi8-u"; 31 + (* Macintosh encodings *) 32 + "macintosh"; "x-mac-roman"; 33 + ] 34 + 35 + (** Check if a charset name is recognized *) 36 + let is_known_charset name = 37 + let lower = to_lower name in 38 + List.mem lower known_charsets 39 + 40 + module Charset = struct 41 + let name = "encoding name" 42 + 43 + let validate s = 44 + if String.length s = 0 then 45 + Error "The empty string is not a valid character encoding name" 46 + else 47 + (* Check all characters are valid *) 48 + let rec check_chars i = 49 + if i >= String.length s then 50 + Ok () 51 + else 52 + let c = s.[i] in 53 + if not (is_valid_charset_char c) then 54 + Error (Printf.sprintf "Value contained '%c', which is not a valid character in an encoding name" c) 55 + else 56 + check_chars (i + 1) 57 + in 58 + match check_chars 0 with 59 + | Error e -> Error e 60 + | Ok () -> 61 + let lower = to_lower s in 62 + (* Reject "replacement" encoding *) 63 + if lower = "replacement" then 64 + Error (Printf.sprintf "'%s' is not a valid character encoding name" s) 65 + (* Check if it's a known charset *) 66 + else if not (is_known_charset lower) then 67 + Error (Printf.sprintf "'%s' is not a valid character encoding name" s) 68 + else 69 + Ok () 70 + 71 + let is_valid s = Result.is_ok (validate s) 72 + end 73 + 74 + module Meta_charset = struct 75 + let name = "legacy character encoding declaration" 76 + 77 + let is_whitespace c = 78 + c = ' ' || c = '\t' || c = '\n' || c = '\012' || c = '\r' 79 + 80 + let validate s = 81 + let lower = to_lower s in 82 + if not (String.starts_with ~prefix:"text/html;" lower) then 83 + Error "The legacy encoding declaration did not start with 'text/html;'" 84 + else if String.length lower = 10 then 85 + Error "The legacy encoding declaration ended prematurely" 86 + else 87 + (* Skip whitespace after semicolon *) 88 + let rec skip_ws i = 89 + if i >= String.length lower then 90 + Error "The legacy encoding declaration did not contain 'charset=' after the semicolon" 91 + else 92 + let c = lower.[i] in 93 + if is_whitespace c then 94 + skip_ws (i + 1) 95 + else if c = 'c' then 96 + Ok i 97 + else 98 + Error (Printf.sprintf "The legacy encoding declaration did not start with space characters or 'charset=' after the semicolon. Found '%c' instead" c) 99 + in 100 + match skip_ws 10 with 101 + | Error e -> Error e 102 + | Ok offset -> 103 + if not (String.sub lower offset (String.length lower - offset) |> String.starts_with ~prefix:"charset=") then 104 + Error "The legacy encoding declaration did not contain 'charset=' after the semicolon" 105 + else 106 + let charset_offset = offset + 8 in 107 + if charset_offset >= String.length lower then 108 + Error "The empty string is not a valid character encoding name" 109 + else 110 + (* Validate remaining characters *) 111 + let rec check_chars i = 112 + if i >= String.length lower then 113 + Ok () 114 + else 115 + let c = lower.[i] in 116 + if not (is_valid_charset_char c) then 117 + Error (Printf.sprintf "The legacy encoding contained '%c', which is not a valid character in an encoding name" c) 118 + else 119 + check_chars (i + 1) 120 + in 121 + match check_chars charset_offset with 122 + | Error e -> Error e 123 + | Ok () -> 124 + let encoding_name = String.sub lower charset_offset (String.length lower - charset_offset) in 125 + if encoding_name <> "utf-8" then 126 + Error "'charset=' must be followed by 'utf-8'" 127 + else 128 + Ok () 129 + 130 + let is_valid s = Result.is_ok (validate s) 131 + end 132 + 133 + let datatypes = [ 134 + (module Charset : Datatype.S); 135 + (module Meta_charset : Datatype.S); 136 + ]
+37
lib/html5_checker/datatype/dt_charset.mli
··· 1 + (** Character encoding datatype validators for HTML5. 2 + 3 + This module provides validators for character encoding names as used in 4 + HTML5. Encoding names must conform to the WHATWG Encoding Standard. *) 5 + 6 + (** Character encoding name datatype. 7 + 8 + Validates a character encoding name according to the WHATWG Encoding 9 + Standard. Valid encoding names include: 10 + - UTF-8 (and variants like "utf-8", "utf8") 11 + - Legacy encodings (ISO-8859-1, windows-1252, etc.) 12 + 13 + The validator checks: 14 + - Non-empty string 15 + - Valid characters (alphanumeric, hyphen, and special chars: ! # $ % & ' + _ ` \{ \} ~ ^) 16 + - Recognizes common encoding labels 17 + 18 + Note: This is a simplified validator that recognizes common encoding 19 + names but does not include the full WHATWG encoding label table. 20 + It accepts labels case-insensitively. *) 21 + module Charset : Datatype.S 22 + 23 + (** Meta charset datatype for legacy encoding declarations. 24 + 25 + Validates the charset attribute value in legacy meta elements of the form: 26 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> 27 + 28 + The validator checks that: 29 + - String starts with "text/html;" 30 + - After optional whitespace, contains "charset=" 31 + - The charset value is "utf-8" (the only allowed value in modern HTML5) 32 + 33 + Example valid value: "text/html; charset=utf-8" *) 34 + module Meta_charset : Datatype.S 35 + 36 + (** List of all charset datatypes *) 37 + val datatypes : Datatype.t list
+243
lib/html5_checker/datatype/dt_color.ml
··· 1 + (** Color validation *) 2 + 3 + (** Named CSS colors *) 4 + let named_colors = 5 + [ 6 + "aliceblue"; 7 + "antiquewhite"; 8 + "aqua"; 9 + "aquamarine"; 10 + "azure"; 11 + "beige"; 12 + "bisque"; 13 + "black"; 14 + "blanchedalmond"; 15 + "blue"; 16 + "blueviolet"; 17 + "brown"; 18 + "burlywood"; 19 + "cadetblue"; 20 + "chartreuse"; 21 + "chocolate"; 22 + "coral"; 23 + "cornflowerblue"; 24 + "cornsilk"; 25 + "crimson"; 26 + "cyan"; 27 + "darkblue"; 28 + "darkcyan"; 29 + "darkgoldenrod"; 30 + "darkgray"; 31 + "darkgrey"; 32 + "darkgreen"; 33 + "darkkhaki"; 34 + "darkmagenta"; 35 + "darkolivegreen"; 36 + "darkorange"; 37 + "darkorchid"; 38 + "darkred"; 39 + "darksalmon"; 40 + "darkseagreen"; 41 + "darkslateblue"; 42 + "darkslategray"; 43 + "darkslategrey"; 44 + "darkturquoise"; 45 + "darkviolet"; 46 + "deeppink"; 47 + "deepskyblue"; 48 + "dimgray"; 49 + "dimgrey"; 50 + "dodgerblue"; 51 + "firebrick"; 52 + "floralwhite"; 53 + "forestgreen"; 54 + "fuchsia"; 55 + "gainsboro"; 56 + "ghostwhite"; 57 + "gold"; 58 + "goldenrod"; 59 + "gray"; 60 + "grey"; 61 + "green"; 62 + "greenyellow"; 63 + "honeydew"; 64 + "hotpink"; 65 + "indianred"; 66 + "indigo"; 67 + "ivory"; 68 + "khaki"; 69 + "lavender"; 70 + "lavenderblush"; 71 + "lawngreen"; 72 + "lemonchiffon"; 73 + "lightblue"; 74 + "lightcoral"; 75 + "lightcyan"; 76 + "lightgoldenrodyellow"; 77 + "lightgray"; 78 + "lightgrey"; 79 + "lightgreen"; 80 + "lightpink"; 81 + "lightsalmon"; 82 + "lightseagreen"; 83 + "lightskyblue"; 84 + "lightslategray"; 85 + "lightslategrey"; 86 + "lightsteelblue"; 87 + "lightyellow"; 88 + "lime"; 89 + "limegreen"; 90 + "linen"; 91 + "magenta"; 92 + "maroon"; 93 + "mediumaquamarine"; 94 + "mediumblue"; 95 + "mediumorchid"; 96 + "mediumpurple"; 97 + "mediumseagreen"; 98 + "mediumslateblue"; 99 + "mediumspringgreen"; 100 + "mediumturquoise"; 101 + "mediumvioletred"; 102 + "midnightblue"; 103 + "mintcream"; 104 + "mistyrose"; 105 + "moccasin"; 106 + "navajowhite"; 107 + "navy"; 108 + "oldlace"; 109 + "olive"; 110 + "olivedrab"; 111 + "orange"; 112 + "orangered"; 113 + "orchid"; 114 + "palegoldenrod"; 115 + "palegreen"; 116 + "paleturquoise"; 117 + "palevioletred"; 118 + "papayawhip"; 119 + "peachpuff"; 120 + "peru"; 121 + "pink"; 122 + "plum"; 123 + "powderblue"; 124 + "purple"; 125 + "red"; 126 + "rosybrown"; 127 + "royalblue"; 128 + "saddlebrown"; 129 + "salmon"; 130 + "sandybrown"; 131 + "seagreen"; 132 + "seashell"; 133 + "sienna"; 134 + "silver"; 135 + "skyblue"; 136 + "slateblue"; 137 + "slategray"; 138 + "slategrey"; 139 + "snow"; 140 + "springgreen"; 141 + "steelblue"; 142 + "tan"; 143 + "teal"; 144 + "thistle"; 145 + "tomato"; 146 + "transparent"; 147 + "turquoise"; 148 + "violet"; 149 + "wheat"; 150 + "white"; 151 + "whitesmoke"; 152 + "yellow"; 153 + "yellowgreen"; 154 + ] 155 + 156 + (** Check if character is hex digit *) 157 + let is_hex_digit c = 158 + (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') 159 + 160 + (** Validate hex color (#RGB or #RRGGBB) *) 161 + let validate_hex_color s = 162 + let len = String.length s in 163 + if len <> 4 && len <> 7 then 164 + Error "Hex color must be #RGB or #RRGGBB format" 165 + else if s.[0] <> '#' then 166 + Error "Hex color must start with '#'" 167 + else 168 + let rec check_hex i = 169 + if i >= len then Ok () 170 + else if is_hex_digit s.[i] then check_hex (i + 1) 171 + else 172 + Error 173 + (Printf.sprintf "Invalid hex digit '%c' at position %d" s.[i] i) 174 + in 175 + check_hex 1 176 + 177 + (** Simple color validator - strict #RRGGBB format *) 178 + module Simple_color = struct 179 + let name = "simple color" 180 + 181 + let validate s = 182 + let s = String.trim s in 183 + if String.length s <> 7 then 184 + Error "Incorrect length for color string (must be 7 characters)" 185 + else if s.[0] <> '#' then 186 + Error 187 + (Printf.sprintf 188 + "Color starts with incorrect character '%c'. Expected the number \ 189 + sign '#'" 190 + s.[0]) 191 + else 192 + let rec check_hex i = 193 + if i >= 7 then Ok () 194 + else if is_hex_digit s.[i] then check_hex (i + 1) 195 + else 196 + Error 197 + (Printf.sprintf "'%c' is not a valid hexadecimal digit" s.[i]) 198 + in 199 + check_hex 1 200 + 201 + let is_valid s = 202 + match validate s with 203 + | Ok () -> true 204 + | Error _ -> false 205 + end 206 + 207 + (** CSS color validator - supports multiple formats *) 208 + module Color = struct 209 + let name = "color" 210 + 211 + let validate s = 212 + let s = String.trim s |> String.lowercase_ascii in 213 + if String.length s = 0 then Error "Color value must not be empty" 214 + else if List.mem s named_colors then Ok () 215 + else if String.length s > 0 && s.[0] = '#' then validate_hex_color s 216 + else if 217 + String.length s > 4 218 + && (String.sub s 0 4 = "rgb(" || String.sub s 0 5 = "rgba(") 219 + then 220 + (* Basic validation for rgb/rgba - just check balanced parens *) 221 + if s.[String.length s - 1] = ')' then Ok () 222 + else Error "rgb/rgba function must end with ')'" 223 + else if 224 + String.length s > 4 225 + && (String.sub s 0 4 = "hsl(" || String.sub s 0 5 = "hsla(") 226 + then 227 + (* Basic validation for hsl/hsla - just check balanced parens *) 228 + if s.[String.length s - 1] = ')' then Ok () 229 + else Error "hsl/hsla function must end with ')'" 230 + else 231 + Error 232 + (Printf.sprintf 233 + "Unrecognized color format '%s' (expected named color, hex, rgb(), \ 234 + rgba(), hsl(), or hsla())" 235 + s) 236 + 237 + let is_valid s = 238 + match validate s with 239 + | Ok () -> true 240 + | Error _ -> false 241 + end 242 + 243 + let datatypes = [ (module Color : Datatype.S); (module Simple_color : Datatype.S) ]
+29
lib/html5_checker/datatype/dt_color.mli
··· 1 + (** Color value datatype validators. 2 + 3 + This module provides validators for CSS color values and simple colors 4 + as defined by the HTML5 specification. *) 5 + 6 + (** CSS color value validator. 7 + 8 + Validates various CSS color formats: 9 + - Named colors (e.g., "red", "blue", "transparent") 10 + - Hex colors: #RGB or #RRGGBB 11 + - rgb() and rgba() functional notation 12 + - hsl() and hsla() functional notation 13 + 14 + This is a simplified validator and does not validate all edge cases. *) 15 + module Color : Datatype.S 16 + 17 + (** Simple color validator (for input[type=color]). 18 + 19 + Validates the simple color format: 20 + - Must be exactly 7 characters 21 + - Must start with '#' 22 + - Followed by exactly 6 hexadecimal digits (0-9, a-f, A-F) 23 + - Format: #RRGGBB 24 + 25 + This is the strict format required for input[type=color]. *) 26 + module Simple_color : Datatype.S 27 + 28 + (** List of all datatypes defined in this module *) 29 + val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_contenteditable.ml
··· 1 + (** Contenteditable attribute validation for HTML5 *) 2 + 3 + module Contenteditable = struct 4 + let name = "contenteditable" 5 + 6 + let validate s = 7 + let s_lower = Datatype.string_to_ascii_lowercase s in 8 + match s_lower with 9 + | "" | "true" | "false" | "plaintext-only" -> Ok () 10 + | _ -> 11 + Error 12 + (Printf.sprintf 13 + "The value '%s' is not a valid contenteditable value. Expected \ 14 + 'true', 'false', 'plaintext-only', or empty string." 15 + s) 16 + 17 + let is_valid s = Result.is_ok (validate s) 18 + end 19 + 20 + let datatypes = [ (module Contenteditable : Datatype.S) ]
+25
lib/html5_checker/datatype/dt_contenteditable.mli
··· 1 + (** Contenteditable attribute datatype validator for HTML5. 2 + 3 + This module provides a validator for the contenteditable attribute, as 4 + defined by the HTML5 specification. *) 5 + 6 + (** Contenteditable attribute validator. 7 + 8 + Validates contenteditable attribute values which can be: 9 + - "true" - the element is editable 10 + - "false" - the element is not editable 11 + - "" (empty string) - equivalent to "true" 12 + - "plaintext-only" - the element is editable, but rich text formatting is 13 + disabled 14 + 15 + Values are case-insensitive. 16 + 17 + Examples: 18 + - "true" 19 + - "false" 20 + - "" 21 + - "plaintext-only" *) 22 + module Contenteditable : Datatype.S 23 + 24 + (** List of all datatypes defined in this module *) 25 + val datatypes : Datatype.t list
+45
lib/html5_checker/datatype/dt_coords.ml
··· 1 + (** Coordinates attribute validation for HTML5 *) 2 + 3 + module Coords = struct 4 + let name = "coords" 5 + 6 + let validate s = 7 + (* Empty string is valid for default shape *) 8 + if s = "" then Ok () 9 + else 10 + (* Split on comma and validate each part is an integer *) 11 + let parts = 12 + String.split_on_char ',' s 13 + |> List.map (fun p -> 14 + let trimmed = String.trim p in 15 + (* Check if it's a valid integer *) 16 + try 17 + let _ = int_of_string trimmed in 18 + Ok () 19 + with Failure _ -> 20 + Error (Printf.sprintf "The value '%s' is not a valid integer" p)) 21 + in 22 + (* Check if all parts are valid *) 23 + let rec check = function 24 + | [] -> Ok () 25 + | Ok () :: rest -> check rest 26 + | (Error msg) :: _ -> Error msg 27 + in 28 + match check parts with 29 + | Error msg -> 30 + Error 31 + (Printf.sprintf 32 + "The coords value '%s' is not valid. %s. Expected a \ 33 + comma-separated list of integers." 34 + s msg) 35 + | Ok () -> ( 36 + (* Verify we have at least some values *) 37 + let count = List.length parts in 38 + match count with 39 + | 0 -> Error "The coords value must not be empty unless for default shape" 40 + | _ -> Ok ()) 41 + 42 + let is_valid s = Result.is_ok (validate s) 43 + end 44 + 45 + let datatypes = [ (module Coords : Datatype.S) ]
+25
lib/html5_checker/datatype/dt_coords.mli
··· 1 + (** Coordinates attribute datatype validator for HTML5. 2 + 3 + This module provides a validator for the coords attribute used on area 4 + elements within image maps, as defined by the HTML5 specification. *) 5 + 6 + (** Coordinates attribute validator. 7 + 8 + Validates coords attribute values which must be a comma-separated list of 9 + valid integers. The number of values depends on the shape: 10 + - rect: exactly 4 values (x1,y1,x2,y2) 11 + - circle: exactly 3 values (x,y,radius) 12 + - poly: even number of values >= 6 (x1,y1,x2,y2,x3,y3,...) 13 + - default: should be empty or not present 14 + 15 + Note: This validator only checks that the value is a valid comma-separated 16 + list of integers. Shape-specific validation should be done at a higher level. 17 + 18 + Examples: 19 + - "0,0,10,10" (rect) 20 + - "50,50,25" (circle) 21 + - "0,0,50,0,50,50,0,50" (poly) *) 22 + module Coords : Datatype.S 23 + 24 + (** List of all datatypes defined in this module *) 25 + val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_crossorigin.ml
··· 1 + (** CORS crossorigin attribute validation for HTML5 *) 2 + 3 + module Crossorigin = struct 4 + let name = "crossorigin" 5 + 6 + let validate s = 7 + let s_lower = Datatype.string_to_ascii_lowercase s in 8 + match s_lower with 9 + | "" | "anonymous" | "use-credentials" -> Ok () 10 + | _ -> 11 + Error 12 + (Printf.sprintf 13 + "The value '%s' is not a valid crossorigin value. Expected \ 14 + empty string, 'anonymous', or 'use-credentials'." 15 + s) 16 + 17 + let is_valid s = Result.is_ok (validate s) 18 + end 19 + 20 + let datatypes = [ (module Crossorigin : Datatype.S) ]
+23
lib/html5_checker/datatype/dt_crossorigin.mli
··· 1 + (** CORS crossorigin attribute datatype validator. 2 + 3 + This module provides a validator for the crossorigin attribute used on 4 + script, link, img, audio, video elements, as defined by the HTML5 spec. *) 5 + 6 + (** Crossorigin attribute validator. 7 + 8 + Validates crossorigin attribute values which can be: 9 + - "" (empty string, equivalent to anonymous) 10 + - "anonymous" (requests use CORS without credentials) 11 + - "use-credentials" (requests use CORS with credentials) 12 + 13 + Values are case-insensitive after ASCII lowercasing. 14 + 15 + Examples: 16 + - "" 17 + - "anonymous" 18 + - "use-credentials" 19 + - "Anonymous" (equivalent to "anonymous") *) 20 + module Crossorigin : Datatype.S 21 + 22 + (** List of all datatypes defined in this module *) 23 + val datatypes : Datatype.t list
+235
lib/html5_checker/datatype/dt_datetime.ml
··· 1 + (** Helper functions for datetime validation *) 2 + 3 + let is_digit c = c >= '0' && c <= '9' 4 + 5 + let is_all_digits s = 6 + String.for_all is_digit s 7 + 8 + let parse_int s = 9 + try Some (int_of_string s) 10 + with Failure _ -> None 11 + 12 + (** Days in each month (non-leap year) *) 13 + let days_in_month = [| 31; 28; 31; 30; 31; 30; 31; 31; 30; 31; 30; 31 |] 14 + 15 + (** Check if a year is a leap year *) 16 + let is_leap_year year = 17 + (year mod 400 = 0) || (year mod 4 = 0 && year mod 100 <> 0) 18 + 19 + (** Get maximum day for a given month/year *) 20 + let max_day_for_month year month = 21 + if month = 2 && is_leap_year year then 29 22 + else days_in_month.(month - 1) 23 + 24 + (** Years in the 400-year cycle that have 53 weeks *) 25 + let special_years_mod_400 = [| 26 + 4; 9; 15; 20; 26; 32; 37; 43; 48; 54; 60; 65; 71; 76; 82; 88; 93; 99; 27 + 105; 111; 116; 122; 128; 133; 139; 144; 150; 156; 161; 167; 172; 178; 28 + 184; 189; 195; 201; 207; 212; 218; 224; 229; 235; 240; 246; 252; 257; 29 + 263; 268; 274; 280; 285; 291; 296; 303; 308; 314; 320; 325; 331; 336; 30 + 342; 348; 353; 359; 364; 370; 376; 381; 387; 392; 398 31 + |] 32 + 33 + (** Check if a year has 53 weeks *) 34 + let has_53_weeks year = 35 + let year_mod = year mod 400 in 36 + Array.exists (fun y -> y = year_mod) special_years_mod_400 37 + 38 + module Year = struct 39 + let name = "year" 40 + 41 + let validate s = 42 + let len = String.length s in 43 + if len < 4 then 44 + Error "Year must be at least 4 digits" 45 + else if not (is_all_digits s) then 46 + Error "Year must contain only digits" 47 + else 48 + match parse_int s with 49 + | None -> Error "Year value out of range" 50 + | Some year -> 51 + if year < 1 then 52 + Error "Year cannot be less than 1" 53 + else 54 + Ok () 55 + 56 + let is_valid s = Result.is_ok (validate s) 57 + end 58 + 59 + module Month = struct 60 + let name = "month" 61 + 62 + let validate s = 63 + if String.length s < 7 then 64 + Error "Month must be in YYYY-MM format" 65 + else 66 + let pattern = Str.regexp "^\\([0-9]+\\)-\\([0-9][0-9]\\)$" in 67 + if not (Str.string_match pattern s 0) then 68 + Error "Month must be in YYYY-MM format" 69 + else 70 + let year_s = Str.matched_group 1 s in 71 + let month_s = Str.matched_group 2 s in 72 + match (parse_int year_s, parse_int month_s) with 73 + | None, _ | _, None -> Error "Year or month out of range" 74 + | Some year, Some month -> 75 + if year < 1 then 76 + Error "Year cannot be less than 1" 77 + else if month < 1 then 78 + Error "Month cannot be less than 1" 79 + else if month > 12 then 80 + Error "Month cannot be greater than 12" 81 + else 82 + Ok () 83 + 84 + let is_valid s = Result.is_ok (validate s) 85 + end 86 + 87 + module Week = struct 88 + let name = "week" 89 + 90 + let validate s = 91 + let pattern = Str.regexp "^\\([0-9]+\\)-W\\([0-9][0-9]\\)$" in 92 + if not (Str.string_match pattern s 0) then 93 + Error "Week must be in YYYY-Www format" 94 + else 95 + let year_s = Str.matched_group 1 s in 96 + let week_s = Str.matched_group 2 s in 97 + match (parse_int year_s, parse_int week_s) with 98 + | None, _ | _, None -> Error "Year or week out of range" 99 + | Some year, Some week -> 100 + if year < 1 then 101 + Error "Year cannot be less than 1" 102 + else if week < 1 then 103 + Error "Week cannot be less than 1" 104 + else if week > 53 then 105 + Error "Week out of range" 106 + else if week = 53 && not (has_53_weeks year) then 107 + Error "Week out of range" 108 + else 109 + Ok () 110 + 111 + let is_valid s = Result.is_ok (validate s) 112 + end 113 + 114 + module Date = struct 115 + let name = "date" 116 + 117 + let validate s = 118 + let pattern = Str.regexp "^\\([0-9]+\\)-\\([0-9][0-9]\\)-\\([0-9][0-9]\\)$" in 119 + if not (Str.string_match pattern s 0) then 120 + Error "Date must be in YYYY-MM-DD format" 121 + else 122 + let year_s = Str.matched_group 1 s in 123 + let month_s = Str.matched_group 2 s in 124 + let day_s = Str.matched_group 3 s in 125 + if String.length year_s < 4 then 126 + Error "Year must be at least 4 digits" 127 + else 128 + match (parse_int year_s, parse_int month_s, parse_int day_s) with 129 + | None, _, _ | _, None, _ | _, _, None -> 130 + Error "Year, month, or day out of range" 131 + | Some year, Some month, Some day -> 132 + if year < 1 then 133 + Error "Year cannot be less than 1" 134 + else if month < 1 then 135 + Error "Month cannot be less than 1" 136 + else if month > 12 then 137 + Error "Month cannot be greater than 12" 138 + else if day < 1 then 139 + Error "Day cannot be less than 1" 140 + else 141 + let max_day = max_day_for_month year month in 142 + if day > max_day then 143 + Error "Day out of range" 144 + else 145 + Ok () 146 + 147 + let is_valid s = Result.is_ok (validate s) 148 + end 149 + 150 + module Time = struct 151 + let name = "time" 152 + 153 + let validate s = 154 + let pattern = Str.regexp "^\\([0-9][0-9]\\):\\([0-9][0-9]\\)\\(:\\([0-9][0-9]\\)\\(\\.\\([0-9]+\\)\\)?\\)?$" in 155 + if not (Str.string_match pattern s 0) then 156 + Error "Time must be in HH:MM[:SS[.sss]] format" 157 + else 158 + let hour_s = Str.matched_group 1 s in 159 + let minute_s = Str.matched_group 2 s in 160 + let second_s = try Some (Str.matched_group 4 s) with Not_found -> None in 161 + let millis_s = try Some (Str.matched_group 6 s) with Not_found -> None in 162 + match (parse_int hour_s, parse_int minute_s) with 163 + | None, _ | _, None -> Error "Hour or minute out of range" 164 + | Some hour, Some minute -> 165 + if hour > 23 then 166 + Error "Hour cannot be greater than 23" 167 + else if minute > 59 then 168 + Error "Minute cannot be greater than 59" 169 + else 170 + match second_s with 171 + | None -> Ok () 172 + | Some sec_s -> 173 + match parse_int sec_s with 174 + | None -> Error "Seconds out of range" 175 + | Some second -> 176 + if second > 59 then 177 + Error "Second cannot be greater than 59" 178 + else 179 + match millis_s with 180 + | None -> Ok () 181 + | Some ms -> 182 + if String.length ms > 3 then 183 + Error "A fraction of a second must be one, two, or three digits" 184 + else if not (is_all_digits ms) then 185 + Error "Invalid milliseconds" 186 + else 187 + Ok () 188 + 189 + let is_valid s = Result.is_ok (validate s) 190 + end 191 + 192 + module Datetime_local = struct 193 + let name = "local datetime" 194 + 195 + let validate s = 196 + let pattern = Str.regexp "^\\(.+\\)[T ]\\(.+\\)$" in 197 + if not (Str.string_match pattern s 0) then 198 + Error "Datetime must be in YYYY-MM-DD[T ]HH:MM[:SS[.sss]] format" 199 + else 200 + let date_s = Str.matched_group 1 s in 201 + let time_s = Str.matched_group 2 s in 202 + match Date.validate date_s with 203 + | Error e -> Error ("Invalid date: " ^ e) 204 + | Ok () -> 205 + match Time.validate time_s with 206 + | Error e -> Error ("Invalid time: " ^ e) 207 + | Ok () -> Ok () 208 + 209 + let is_valid s = Result.is_ok (validate s) 210 + end 211 + 212 + module Datetime = struct 213 + let name = "datetime" 214 + 215 + let validate s = 216 + if not (String.ends_with ~suffix:"Z" s) then 217 + Error "Global datetime must end with 'Z'" 218 + else 219 + let s_without_z = String.sub s 0 (String.length s - 1) in 220 + match Datetime_local.validate s_without_z with 221 + | Error e -> Error e 222 + | Ok () -> Ok () 223 + 224 + let is_valid s = Result.is_ok (validate s) 225 + end 226 + 227 + let datatypes = [ 228 + (module Year : Datatype.S); 229 + (module Month : Datatype.S); 230 + (module Week : Datatype.S); 231 + (module Date : Datatype.S); 232 + (module Time : Datatype.S); 233 + (module Datetime_local : Datatype.S); 234 + (module Datetime : Datatype.S); 235 + ]
+74
lib/html5_checker/datatype/dt_datetime.mli
··· 1 + (** Date and time datatype validators for HTML5. 2 + 3 + This module provides validators for various HTML5 date and time formats 4 + as specified in the HTML5 standard. Each validator checks that strings 5 + conform to the specific format and contain valid values. *) 6 + 7 + (** Year datatype (YYYY format, minimum 1). 8 + 9 + Validates a year string consisting of 4 or more ASCII digits. 10 + The year must be at least 1. *) 11 + module Year : Datatype.S 12 + 13 + (** Month datatype (YYYY-MM format). 14 + 15 + Validates a month string in the format YYYY-MM where: 16 + - YYYY is a valid year (>= 1) 17 + - MM is a month from 01 to 12 *) 18 + module Month : Datatype.S 19 + 20 + (** Week datatype (YYYY-Www format). 21 + 22 + Validates a week string in the format YYYY-Www where: 23 + - YYYY is a valid year (>= 1) 24 + - W is the literal character 'W' 25 + - ww is a week number from 01 to 53 26 + 27 + Week 53 is only valid for years that have 53 weeks in the ISO 8601 28 + week-numbering calendar. *) 29 + module Week : Datatype.S 30 + 31 + (** Date datatype (YYYY-MM-DD format). 32 + 33 + Validates a date string in the format YYYY-MM-DD where: 34 + - YYYY is a valid year (>= 1) 35 + - MM is a month from 01 to 12 36 + - DD is a day valid for the given month/year 37 + 38 + Handles leap years correctly. *) 39 + module Date : Datatype.S 40 + 41 + (** Time datatype (HH:MM[:SS[.sss]] format). 42 + 43 + Validates a time string where: 44 + - HH is hours from 00 to 23 45 + - MM is minutes from 00 to 59 46 + - SS (optional) is seconds from 00 to 59 47 + - sss (optional) is milliseconds (1-3 digits) 48 + 49 + Valid formats: 50 + - HH:MM 51 + - HH:MM:SS 52 + - HH:MM:SS.s 53 + - HH:MM:SS.ss 54 + - HH:MM:SS.sss *) 55 + module Time : Datatype.S 56 + 57 + (** Local datetime datatype (YYYY-MM-DD[T ]HH:MM[:SS[.sss]] format). 58 + 59 + Validates a local datetime string combining date and time with 'T' or 60 + space separator. The date must be valid and the time must be valid. 61 + 62 + This format does not include timezone information. *) 63 + module Datetime_local : Datatype.S 64 + 65 + (** Global datetime datatype (YYYY-MM-DD[T ]HH:MM[:SS[.sss]]Z format). 66 + 67 + Validates a global datetime string in UTC (ending with 'Z'). 68 + The date must be valid and the time must be valid. 69 + 70 + This is the format for datetime values that include timezone (UTC). *) 71 + module Datetime : Datatype.S 72 + 73 + (** List of all datetime datatypes *) 74 + val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_decoding.ml
··· 1 + (** Image decoding attribute validation for HTML5 *) 2 + 3 + module Decoding = struct 4 + let name = "decoding" 5 + 6 + let validate s = 7 + let s_lower = Datatype.string_to_ascii_lowercase s in 8 + match s_lower with 9 + | "" | "sync" | "async" | "auto" -> Ok () 10 + | _ -> 11 + Error 12 + (Printf.sprintf 13 + "The value '%s' is not a valid decoding value. Expected empty \ 14 + string, 'sync', 'async', or 'auto'." 15 + s) 16 + 17 + let is_valid s = Result.is_ok (validate s) 18 + end 19 + 20 + let datatypes = [ (module Decoding : Datatype.S) ]
+24
lib/html5_checker/datatype/dt_decoding.mli
··· 1 + (** Image decoding attribute datatype validator. 2 + 3 + This module provides a validator for the decoding attribute used to provide 4 + a hint for image decoding, as defined by the HTML5 spec. *) 5 + 6 + (** Decoding attribute validator. 7 + 8 + Validates decoding attribute values which can be: 9 + - "" (empty string, default decoding behavior) 10 + - "sync" (decode synchronously for atomic presentation) 11 + - "async" (decode asynchronously to avoid delaying other content) 12 + - "auto" (no preference for decoding mode) 13 + 14 + Values are case-insensitive after ASCII lowercasing. 15 + 16 + Examples: 17 + - "" 18 + - "sync" 19 + - "async" 20 + - "auto" *) 21 + module Decoding : Datatype.S 22 + 23 + (** List of all datatypes defined in this module *) 24 + val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_dir.ml
··· 1 + (** Text direction attribute validation for HTML5 *) 2 + 3 + module Dir = struct 4 + let name = "dir" 5 + 6 + let validate s = 7 + let s_lower = Datatype.string_to_ascii_lowercase s in 8 + match s_lower with 9 + | "" | "ltr" | "rtl" | "auto" -> Ok () 10 + | _ -> 11 + Error 12 + (Printf.sprintf 13 + "The value '%s' is not a valid dir value. Expected empty \ 14 + string, 'ltr', 'rtl', or 'auto'." 15 + s) 16 + 17 + let is_valid s = Result.is_ok (validate s) 18 + end 19 + 20 + let datatypes = [ (module Dir : Datatype.S) ]
+24
lib/html5_checker/datatype/dt_dir.mli
··· 1 + (** Text direction attribute datatype validator. 2 + 3 + This module provides a validator for the dir attribute used to specify 4 + the text directionality of element content, as defined by the HTML5 spec. *) 5 + 6 + (** Dir attribute validator. 7 + 8 + Validates dir attribute values which can be: 9 + - "" (empty string, inherits directionality) 10 + - "ltr" (left-to-right text direction) 11 + - "rtl" (right-to-left text direction) 12 + - "auto" (directionality determined from content) 13 + 14 + Values are case-insensitive after ASCII lowercasing. 15 + 16 + Examples: 17 + - "" 18 + - "ltr" 19 + - "rtl" 20 + - "auto" *) 21 + module Dir : Datatype.S 22 + 23 + (** List of all datatypes defined in this module *) 24 + val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_draggable.ml
··· 1 + (** Draggable attribute validation for HTML5 *) 2 + 3 + module Draggable = struct 4 + let name = "draggable" 5 + 6 + let validate s = 7 + let s_lower = Datatype.string_to_ascii_lowercase s in 8 + match s_lower with 9 + | "true" | "false" -> Ok () 10 + | _ -> 11 + Error 12 + (Printf.sprintf 13 + "The value '%s' is not a valid draggable value. Expected 'true' \ 14 + or 'false'." 15 + s) 16 + 17 + let is_valid s = Result.is_ok (validate s) 18 + end 19 + 20 + let datatypes = [ (module Draggable : Datatype.S) ]
+20
lib/html5_checker/datatype/dt_draggable.mli
··· 1 + (** Draggable attribute datatype validator for HTML5. 2 + 3 + This module provides a validator for the draggable attribute, as defined by 4 + the HTML5 specification. *) 5 + 6 + (** Draggable attribute validator. 7 + 8 + Validates draggable attribute values which can be: 9 + - "true" - the element is draggable 10 + - "false" - the element is not draggable 11 + 12 + Values are case-insensitive. 13 + 14 + Examples: 15 + - "true" 16 + - "false" *) 17 + module Draggable : Datatype.S 18 + 19 + (** List of all datatypes defined in this module *) 20 + val datatypes : Datatype.t list
+88
lib/html5_checker/datatype/dt_email.ml
··· 1 + (** Email address validation *) 2 + 3 + (** Helper to check if a character is valid in email local/domain parts *) 4 + let is_email_char c = 5 + (c >= 'a' && c <= 'z') 6 + || (c >= 'A' && c <= 'Z') 7 + || (c >= '0' && c <= '9') 8 + || c = '.' || c = '-' || c = '_' || c = '+' || c = '=' 9 + 10 + (** Validate a single email address using simplified rules *) 11 + let validate_email s = 12 + let s = String.trim s in 13 + if String.length s = 0 then Error "Email address must not be empty" 14 + else 15 + (* Check for exactly one @ symbol *) 16 + let at_count = ref 0 in 17 + let at_pos = ref (-1) in 18 + String.iteri 19 + (fun i c -> if c = '@' then ( 20 + incr at_count; 21 + at_pos := i 22 + )) 23 + s; 24 + if !at_count = 0 then Error "Email address must contain an '@' character" 25 + else if !at_count > 1 then 26 + Error "Email address must contain exactly one '@' character" 27 + else 28 + let local = String.sub s 0 !at_pos in 29 + let domain = String.sub s (!at_pos + 1) (String.length s - !at_pos - 1) in 30 + 31 + (* Validate local part *) 32 + if String.length local = 0 then 33 + Error "Email address must have a local part before '@'" 34 + else if local.[0] = '.' || local.[String.length local - 1] = '.' then 35 + Error "Email local part must not start or end with '.'" 36 + else if not (String.for_all is_email_char local) then 37 + Error "Email local part contains invalid characters" 38 + else (* Validate domain part *) 39 + if String.length domain = 0 then 40 + Error "Email address must have a domain part after '@'" 41 + else if not (String.contains domain '.') then 42 + Error "Email domain must contain at least one '.'" 43 + else if domain.[0] = '.' || domain.[String.length domain - 1] = '.' then 44 + Error "Email domain must not start or end with '.'" 45 + else if 46 + not 47 + (String.for_all 48 + (fun c -> is_email_char c || c = '.') 49 + domain) 50 + then Error "Email domain contains invalid characters" 51 + else Ok () 52 + 53 + module Email = struct 54 + let name = "email address" 55 + let validate = validate_email 56 + 57 + let is_valid s = 58 + match validate s with 59 + | Ok () -> true 60 + | Error _ -> false 61 + end 62 + 63 + module Email_list = struct 64 + let name = "email address list" 65 + 66 + let validate s = 67 + let s = String.trim s in 68 + if String.length s = 0 then Error "Email list must not be empty" 69 + else 70 + (* Split on commas and validate each email *) 71 + let emails = String.split_on_char ',' s in 72 + let rec check_all = function 73 + | [] -> Ok () 74 + | email :: rest -> ( 75 + match validate_email email with 76 + | Ok () -> check_all rest 77 + | Error msg -> 78 + Error (Printf.sprintf "Invalid email in list: %s" msg)) 79 + in 80 + check_all emails 81 + 82 + let is_valid s = 83 + match validate s with 84 + | Ok () -> true 85 + | Error _ -> false 86 + end 87 + 88 + let datatypes = [ (module Email : Datatype.S); (module Email_list : Datatype.S) ]
+23
lib/html5_checker/datatype/dt_email.mli
··· 1 + (** Email address datatype validators. 2 + 3 + This module provides validators for email addresses and email address lists 4 + as defined by the HTML5 specification. *) 5 + 6 + (** Valid email address validator. 7 + 8 + Validates a single email address. Uses simplified validation rules: 9 + - Must contain exactly one '@' character 10 + - Local part (before @) must be non-empty 11 + - Domain part (after @) must be non-empty and contain at least one '.' 12 + - Only ASCII characters allowed *) 13 + module Email : Datatype.S 14 + 15 + (** Comma-separated email address list validator. 16 + 17 + Validates a comma-separated list of email addresses. 18 + Each address in the list must be valid according to {!Email} rules. 19 + Whitespace around commas is ignored. *) 20 + module Email_list : Datatype.S 21 + 22 + (** List of all datatypes defined in this module *) 23 + val datatypes : Datatype.t list
+22
lib/html5_checker/datatype/dt_enterkeyhint.ml
··· 1 + (** Enter key hint attribute validation for HTML5 *) 2 + 3 + module Enterkeyhint = struct 4 + let name = "enterkeyhint" 5 + 6 + let validate s = 7 + let s_lower = Datatype.string_to_ascii_lowercase s in 8 + match s_lower with 9 + | "" | "enter" | "done" | "go" | "next" | "previous" | "search" | "send" -> 10 + Ok () 11 + | _ -> 12 + Error 13 + (Printf.sprintf 14 + "The value '%s' is not a valid enterkeyhint value. Expected \ 15 + one of: empty string, 'enter', 'done', 'go', 'next', \ 16 + 'previous', 'search', or 'send'." 17 + s) 18 + 19 + let is_valid s = Result.is_ok (validate s) 20 + end 21 + 22 + let datatypes = [ (module Enterkeyhint : Datatype.S) ]
+28
lib/html5_checker/datatype/dt_enterkeyhint.mli
··· 1 + (** Enter key hint attribute datatype validator. 2 + 3 + This module provides a validator for the enterkeyhint attribute used to 4 + customize the enter key label on virtual keyboards, as defined by the HTML5 spec. *) 5 + 6 + (** Enterkeyhint attribute validator. 7 + 8 + Validates enterkeyhint attribute values which can be: 9 + - "" (empty string, default enter key) 10 + - "enter" (insert new line) 11 + - "done" (close input method editor) 12 + - "go" (navigate to target) 13 + - "next" (advance to next field) 14 + - "previous" (go back to previous field) 15 + - "search" (perform search) 16 + - "send" (submit or deliver) 17 + 18 + Values are case-insensitive after ASCII lowercasing. 19 + 20 + Examples: 21 + - "" 22 + - "done" 23 + - "next" 24 + - "search" *) 25 + module Enterkeyhint : Datatype.S 26 + 27 + (** List of all datatypes defined in this module *) 28 + val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_fetchpriority.ml
··· 1 + (** Fetch priority attribute validation for HTML5 *) 2 + 3 + module Fetchpriority = struct 4 + let name = "fetchpriority" 5 + 6 + let validate s = 7 + let s_lower = Datatype.string_to_ascii_lowercase s in 8 + match s_lower with 9 + | "" | "high" | "low" | "auto" -> Ok () 10 + | _ -> 11 + Error 12 + (Printf.sprintf 13 + "The value '%s' is not a valid fetchpriority value. Expected \ 14 + empty string, 'high', 'low', or 'auto'." 15 + s) 16 + 17 + let is_valid s = Result.is_ok (validate s) 18 + end 19 + 20 + let datatypes = [ (module Fetchpriority : Datatype.S) ]
+24
lib/html5_checker/datatype/dt_fetchpriority.mli
··· 1 + (** Fetch priority attribute datatype validator. 2 + 3 + This module provides a validator for the fetchpriority attribute used to 4 + provide a hint for resource fetch priority, as defined by the HTML5 spec. *) 5 + 6 + (** Fetchpriority attribute validator. 7 + 8 + Validates fetchpriority attribute values which can be: 9 + - "" (empty string, default fetch priority) 10 + - "high" (fetch at high priority relative to other resources) 11 + - "low" (fetch at low priority relative to other resources) 12 + - "auto" (no preference for fetch priority) 13 + 14 + Values are case-insensitive after ASCII lowercasing. 15 + 16 + Examples: 17 + - "" 18 + - "high" 19 + - "low" 20 + - "auto" *) 21 + module Fetchpriority : Datatype.S 22 + 23 + (** List of all datatypes defined in this module *) 24 + val datatypes : Datatype.t list
+254
lib/html5_checker/datatype/dt_float.ml
··· 1 + (** Valid HTML5 floating point number *) 2 + module Float_ = struct 3 + let name = "floating point number" 4 + 5 + type state = 6 + | At_start 7 + | At_start_minus_seen 8 + | In_integer_part_digits_seen 9 + | Dot_seen 10 + | E_seen 11 + | In_decimal_part_digits_seen 12 + | In_exponent_sign_seen 13 + | In_exponent_digits_seen 14 + 15 + let validate s = 16 + let len = String.length s in 17 + let rec parse i state = 18 + if i >= len then 19 + match state with 20 + | In_integer_part_digits_seen | In_decimal_part_digits_seen 21 + | In_exponent_digits_seen -> 22 + Ok () 23 + | At_start -> Error "The empty string is not a valid floating point number." 24 + | At_start_minus_seen -> 25 + Error "The minus sign alone is not a valid floating point number." 26 + | Dot_seen -> 27 + Error "A floating point number must not end with the decimal point." 28 + | E_seen -> 29 + Error "A floating point number must not end with the exponent 'e'." 30 + | In_exponent_sign_seen -> 31 + Error 32 + "A floating point number must not end with only a sign in the \ 33 + exponent." 34 + else 35 + let c = s.[i] in 36 + match state with 37 + | At_start -> 38 + if c = '-' then parse (i + 1) At_start_minus_seen 39 + else if c = '.' then parse (i + 1) Dot_seen 40 + else if Datatype.is_ascii_digit c then 41 + parse (i + 1) In_integer_part_digits_seen 42 + else 43 + Error 44 + (Printf.sprintf 45 + "Expected a minus sign or a digit but saw '%c' instead." c) 46 + | At_start_minus_seen -> 47 + if Datatype.is_ascii_digit c then 48 + parse (i + 1) In_integer_part_digits_seen 49 + else Error (Printf.sprintf "Expected a digit but saw '%c' instead." c) 50 + | In_integer_part_digits_seen -> 51 + if c = '.' then parse (i + 1) Dot_seen 52 + else if c = 'e' || c = 'E' then parse (i + 1) E_seen 53 + else if Datatype.is_ascii_digit c then 54 + parse (i + 1) In_integer_part_digits_seen 55 + else 56 + Error 57 + (Printf.sprintf 58 + "Expected a decimal point, 'e', 'E' or a digit but saw '%c' \ 59 + instead." 60 + c) 61 + | Dot_seen -> 62 + if Datatype.is_ascii_digit c then 63 + parse (i + 1) In_decimal_part_digits_seen 64 + else 65 + Error 66 + (Printf.sprintf 67 + "Expected a digit after the decimal point but saw '%c' instead." 68 + c) 69 + | In_decimal_part_digits_seen -> 70 + if Datatype.is_ascii_digit c then 71 + parse (i + 1) In_decimal_part_digits_seen 72 + else if c = 'e' || c = 'E' then parse (i + 1) E_seen 73 + else 74 + Error 75 + (Printf.sprintf "Expected 'e', 'E' or a digit but saw '%c' instead." 76 + c) 77 + | E_seen -> 78 + if c = '-' || c = '+' then parse (i + 1) In_exponent_sign_seen 79 + else if Datatype.is_ascii_digit c then 80 + parse (i + 1) In_exponent_digits_seen 81 + else 82 + Error 83 + (Printf.sprintf 84 + "Expected a minus sign, a plus sign or a digit but saw '%c' \ 85 + instead." 86 + c) 87 + | In_exponent_sign_seen -> 88 + if Datatype.is_ascii_digit c then 89 + parse (i + 1) In_exponent_digits_seen 90 + else Error (Printf.sprintf "Expected a digit but saw '%c' instead." c) 91 + | In_exponent_digits_seen -> 92 + if Datatype.is_ascii_digit c then 93 + parse (i + 1) In_exponent_digits_seen 94 + else Error (Printf.sprintf "Expected a digit but saw '%c' instead." c) 95 + in 96 + parse 0 At_start 97 + 98 + let is_valid s = Result.is_ok (validate s) 99 + end 100 + 101 + (** Non-negative floating point number (>= 0) *) 102 + module Float_non_negative = struct 103 + let name = "non-negative floating point number" 104 + 105 + type state = 106 + | At_start 107 + | At_start_minus_seen 108 + | In_integer_part_digits_seen 109 + | In_integer_part_digits_seen_zero 110 + | Dot_seen 111 + | Dot_seen_zero 112 + | E_seen 113 + | In_decimal_part_digits_seen 114 + | In_decimal_part_digits_seen_zero 115 + | In_exponent_sign_seen 116 + | In_exponent_digits_seen 117 + 118 + let validate s = 119 + let len = String.length s in 120 + let rec parse i state = 121 + if i >= len then 122 + match state with 123 + | In_integer_part_digits_seen | In_decimal_part_digits_seen 124 + | In_integer_part_digits_seen_zero | In_decimal_part_digits_seen_zero 125 + | In_exponent_digits_seen -> 126 + Ok () 127 + | At_start -> 128 + Error "The empty string is not a valid non-negative floating point number." 129 + | At_start_minus_seen -> 130 + Error 131 + "The minus sign alone is not a valid non-negative floating point \ 132 + number." 133 + | Dot_seen | Dot_seen_zero -> 134 + Error 135 + "A non-negative floating point number must not end with the \ 136 + decimal point." 137 + | E_seen -> 138 + Error 139 + "A non-negative floating point number must not end with the \ 140 + exponent 'e'." 141 + | In_exponent_sign_seen -> 142 + Error 143 + "A non-negative floating point number must not end with only a \ 144 + sign in the exponent." 145 + else 146 + let c = s.[i] in 147 + match state with 148 + | At_start -> 149 + if c = '-' then parse (i + 1) At_start_minus_seen 150 + else if c = '.' then parse (i + 1) Dot_seen 151 + else if Datatype.is_ascii_digit c then 152 + parse (i + 1) In_integer_part_digits_seen 153 + else 154 + Error 155 + (Printf.sprintf 156 + "Expected a minus sign or a digit but saw '%c' instead." c) 157 + | At_start_minus_seen -> 158 + if c = '0' then parse (i + 1) In_integer_part_digits_seen_zero 159 + else Error (Printf.sprintf "Expected a zero but saw '%c' instead." c) 160 + | In_integer_part_digits_seen -> 161 + if c = '.' then parse (i + 1) Dot_seen 162 + else if c = 'e' || c = 'E' then parse (i + 1) E_seen 163 + else if Datatype.is_ascii_digit c then 164 + parse (i + 1) In_integer_part_digits_seen 165 + else 166 + Error 167 + (Printf.sprintf 168 + "Expected a decimal point, 'e', 'E' or a digit but saw '%c' \ 169 + instead." 170 + c) 171 + | In_integer_part_digits_seen_zero -> 172 + if c = '.' then parse (i + 1) Dot_seen_zero 173 + else if c = 'e' || c = 'E' then parse (i + 1) E_seen 174 + else if c = '0' then parse (i + 1) In_integer_part_digits_seen_zero 175 + else 176 + Error 177 + (Printf.sprintf 178 + "Expected a decimal point, 'e', 'E' or a zero but saw '%c' \ 179 + instead." 180 + c) 181 + | Dot_seen -> 182 + if Datatype.is_ascii_digit c then 183 + parse (i + 1) In_decimal_part_digits_seen 184 + else 185 + Error 186 + (Printf.sprintf 187 + "Expected a digit after the decimal point but saw '%c' instead." 188 + c) 189 + | Dot_seen_zero -> 190 + if c = '0' then parse (i + 1) In_decimal_part_digits_seen_zero 191 + else 192 + Error 193 + (Printf.sprintf 194 + "Expected a zero after the decimal point but saw '%c' instead." 195 + c) 196 + | In_decimal_part_digits_seen -> 197 + if Datatype.is_ascii_digit c then 198 + parse (i + 1) In_decimal_part_digits_seen 199 + else if c = 'e' || c = 'E' then parse (i + 1) E_seen 200 + else 201 + Error 202 + (Printf.sprintf "Expected 'e', 'E' or a digit but saw '%c' instead." 203 + c) 204 + | In_decimal_part_digits_seen_zero -> 205 + if c = '0' then parse (i + 1) In_decimal_part_digits_seen_zero 206 + else if c = 'e' || c = 'E' then parse (i + 1) E_seen 207 + else 208 + Error 209 + (Printf.sprintf "Expected 'e', 'E' or a zero but saw '%c' instead." 210 + c) 211 + | E_seen -> 212 + if c = '-' || c = '+' then parse (i + 1) In_exponent_sign_seen 213 + else if Datatype.is_ascii_digit c then 214 + parse (i + 1) In_exponent_digits_seen 215 + else 216 + Error 217 + (Printf.sprintf 218 + "Expected a minus sign, a plus sign or a digit but saw '%c' \ 219 + instead." 220 + c) 221 + | In_exponent_sign_seen -> 222 + if Datatype.is_ascii_digit c then 223 + parse (i + 1) In_exponent_digits_seen 224 + else Error (Printf.sprintf "Expected a digit but saw '%c' instead." c) 225 + | In_exponent_digits_seen -> 226 + if Datatype.is_ascii_digit c then 227 + parse (i + 1) In_exponent_digits_seen 228 + else Error (Printf.sprintf "Expected a digit but saw '%c' instead." c) 229 + in 230 + parse 0 At_start 231 + 232 + let is_valid s = Result.is_ok (validate s) 233 + end 234 + 235 + (** Positive floating point number (> 0) *) 236 + module Float_positive = struct 237 + let name = "positive floating point number" 238 + 239 + (* For positive floats, we validate it's a valid non-negative float, 240 + then check it's not zero *) 241 + let validate s = 242 + match Float_non_negative.validate s with 243 + | Error _ as e -> e 244 + | Ok () -> ( 245 + (* Parse as float and check if it's positive *) 246 + try 247 + let f = float_of_string s in 248 + if f > 0.0 then Ok () 249 + else Error "The value must be a positive floating point number." 250 + with Failure _ -> 251 + Error "Invalid floating point number format.") 252 + 253 + let is_valid s = Result.is_ok (validate s) 254 + end
+16
lib/html5_checker/datatype/dt_float.mli
··· 1 + (** Floating point datatype validators for HTML5 *) 2 + 3 + (** Valid HTML5 floating point number *) 4 + module Float_ : sig 5 + include Datatype.S 6 + end 7 + 8 + (** Non-negative floating point number (>= 0) *) 9 + module Float_non_negative : sig 10 + include Datatype.S 11 + end 12 + 13 + (** Positive floating point number (> 0) *) 14 + module Float_positive : sig 15 + include Datatype.S 16 + end
+28
lib/html5_checker/datatype/dt_form_enctype.ml
··· 1 + (** Form encoding type attribute validation based on HTML5 spec *) 2 + 3 + (** Valid form enctype values *) 4 + let valid_enctypes = 5 + [ 6 + "application/x-www-form-urlencoded"; 7 + "multipart/form-data"; 8 + "text/plain"; 9 + ] 10 + 11 + module Form_enctype = struct 12 + let name = "form-enctype" 13 + 14 + let validate s = 15 + let s_lower = Datatype.string_to_ascii_lowercase s in 16 + if List.mem s_lower valid_enctypes then Ok () 17 + else 18 + Error 19 + (Printf.sprintf 20 + "The value '%s' is not a valid form encoding type. Expected one of: \ 21 + %s." 22 + s 23 + (String.concat ", " valid_enctypes)) 24 + 25 + let is_valid s = Result.is_ok (validate s) 26 + end 27 + 28 + let datatypes = [ (module Form_enctype : Datatype.S) ]
+22
lib/html5_checker/datatype/dt_form_enctype.mli
··· 1 + (** Form encoding type attribute datatype validator. 2 + 3 + This module provides a validator for the enctype and formenctype attributes 4 + used on form and input/button elements, as defined by the HTML5 specification. *) 5 + 6 + (** Form encoding type attribute validator. 7 + 8 + Validates form enctype/formenctype attribute values which can be: 9 + - application/x-www-form-urlencoded - Default encoding (form fields as name=value pairs) 10 + - multipart/form-data - Multipart encoding (required for file uploads) 11 + - text/plain - Plain text encoding (mostly for debugging) 12 + 13 + Values are matched case-insensitively according to HTML5 spec. 14 + 15 + Examples: 16 + - "application/x-www-form-urlencoded" 17 + - "multipart/form-data" 18 + - "text/plain" *) 19 + module Form_enctype : Datatype.S 20 + 21 + (** List of all datatypes defined in this module *) 22 + val datatypes : Datatype.t list
+22
lib/html5_checker/datatype/dt_form_method.ml
··· 1 + (** Form method attribute validation based on HTML5 spec *) 2 + 3 + (** Valid form method values *) 4 + let valid_methods = [ "get"; "post"; "dialog" ] 5 + 6 + module Form_method = struct 7 + let name = "form-method" 8 + 9 + let validate s = 10 + let s_lower = Datatype.string_to_ascii_lowercase s in 11 + if List.mem s_lower valid_methods then Ok () 12 + else 13 + Error 14 + (Printf.sprintf 15 + "The value '%s' is not a valid form method. Expected one of: %s." 16 + s 17 + (String.concat ", " valid_methods)) 18 + 19 + let is_valid s = Result.is_ok (validate s) 20 + end 21 + 22 + let datatypes = [ (module Form_method : Datatype.S) ]
+22
lib/html5_checker/datatype/dt_form_method.mli
··· 1 + (** Form method attribute datatype validator. 2 + 3 + This module provides a validator for the method attribute used on 4 + form elements, as defined by the HTML5 specification. *) 5 + 6 + (** Form method attribute validator. 7 + 8 + Validates form method attribute values which can be: 9 + - get - GET method (form data submitted in URL query string) 10 + - post - POST method (form data submitted in request body) 11 + - dialog - Dialog method (closes the dialog containing the form) 12 + 13 + Values are matched case-insensitively according to HTML5 spec. 14 + 15 + Examples: 16 + - "get" 17 + - "post" 18 + - "dialog" *) 19 + module Form_method : Datatype.S 20 + 21 + (** List of all datatypes defined in this module *) 22 + val datatypes : Datatype.t list
+29
lib/html5_checker/datatype/dt_hash.ml
··· 1 + (** Hash-name and fragment identifier datatype validators for HTML5. *) 2 + 3 + module Hash_name : Datatype.S = struct 4 + let name = "hash-name reference" 5 + 6 + let validate s = 7 + let len = String.length s in 8 + if len = 0 then Error "The empty string is not a valid hash-name reference." 9 + else if s.[0] <> '#' then 10 + Error "A hash-name reference must start with \"#\"." 11 + else if len = 1 then 12 + Error "A hash-name reference must have at least one character after \"#\"." 13 + else Ok () 14 + 15 + let is_valid s = Result.is_ok (validate s) 16 + end 17 + 18 + module Hash_or_empty : Datatype.S = struct 19 + let name = "hash-name reference (potentially empty)" 20 + 21 + let validate s = 22 + if String.length s = 0 then Ok () 23 + else Hash_name.validate s 24 + 25 + let is_valid s = Result.is_ok (validate s) 26 + end 27 + 28 + let datatypes = 29 + [ (module Hash_name : Datatype.S); (module Hash_or_empty : Datatype.S) ]
+24
lib/html5_checker/datatype/dt_hash.mli
··· 1 + (** Hash-name and fragment identifier datatype validators for HTML5. 2 + 3 + This module provides validators for fragment identifiers (hash-name 4 + references) used in URLs to reference specific parts of a document. *) 5 + 6 + (** Hash-name reference validator. 7 + 8 + A hash-name reference is a fragment identifier that starts with '#' 9 + followed by one or more characters. 10 + 11 + Requirements: 12 + - Must not be empty 13 + - Must start with '#' 14 + - Must have at least one character after '#' *) 15 + module Hash_name : Datatype.S 16 + 17 + (** Hash-name or empty validator. 18 + 19 + Same as Hash_name but allows empty strings. This is used for attributes 20 + where the hash-name reference is optional. *) 21 + module Hash_or_empty : Datatype.S 22 + 23 + (** List of all hash-related datatypes for registration. *) 24 + val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_hidden.ml
··· 1 + (** Hidden attribute validation for HTML5 *) 2 + 3 + module Hidden = struct 4 + let name = "hidden" 5 + 6 + let validate s = 7 + let s_lower = Datatype.string_to_ascii_lowercase s in 8 + match s_lower with 9 + | "" | "hidden" | "until-found" -> Ok () 10 + | _ -> 11 + Error 12 + (Printf.sprintf 13 + "The value '%s' is not a valid hidden value. Expected 'hidden', \ 14 + 'until-found', or empty string." 15 + s) 16 + 17 + let is_valid s = Result.is_ok (validate s) 18 + end 19 + 20 + let datatypes = [ (module Hidden : Datatype.S) ]
+23
lib/html5_checker/datatype/dt_hidden.mli
··· 1 + (** Hidden attribute datatype validator for HTML5. 2 + 3 + This module provides a validator for the hidden attribute, as defined by 4 + the HTML5 specification. *) 5 + 6 + (** Hidden attribute validator. 7 + 8 + Validates hidden attribute values which can be: 9 + - "" (empty string) - the element is hidden 10 + - "hidden" - the element is hidden 11 + - "until-found" - the element is hidden until found by a find-in-page or 12 + fragment navigation 13 + 14 + Values are case-insensitive. 15 + 16 + Examples: 17 + - "" 18 + - "hidden" 19 + - "until-found" *) 20 + module Hidden : Datatype.S 21 + 22 + (** List of all datatypes defined in this module *) 23 + val datatypes : Datatype.t list
+54
lib/html5_checker/datatype/dt_id.ml
··· 1 + (** ID-related datatype validators for HTML5. *) 2 + 3 + module Id : Datatype.S = struct 4 + let name = "id" 5 + 6 + let validate s = 7 + let len = String.length s in 8 + if len = 0 then Error "An ID must not be the empty string." 9 + else 10 + match String.index_opt s ' ' with 11 + | Some _ -> Error "An ID must not contain whitespace." 12 + | None -> ( 13 + (* Check for other whitespace characters *) 14 + let rec check_whitespace i = 15 + if i >= len then Ok () 16 + else if Datatype.is_whitespace s.[i] then 17 + Error "An ID must not contain whitespace." 18 + else check_whitespace (i + 1) 19 + in 20 + check_whitespace 0) 21 + 22 + let is_valid s = Result.is_ok (validate s) 23 + end 24 + 25 + module Idref : Datatype.S = struct 26 + let name = "id reference" 27 + 28 + (* An IDREF has the same validation rules as an ID *) 29 + let validate = Id.validate 30 + let is_valid s = Result.is_ok (validate s) 31 + end 32 + 33 + module Idrefs : Datatype.S = struct 34 + let name = "id references" 35 + 36 + let validate s = 37 + (* IDREFS must contain at least one non-whitespace character *) 38 + let len = String.length s in 39 + let rec check_non_whitespace i = 40 + if i >= len then 41 + Error "An IDREFS value must contain at least one non-whitespace character." 42 + else if not (Datatype.is_whitespace s.[i]) then Ok () 43 + else check_non_whitespace (i + 1) 44 + in 45 + check_non_whitespace 0 46 + 47 + let is_valid s = Result.is_ok (validate s) 48 + end 49 + 50 + let datatypes = 51 + [ (module Id : Datatype.S) 52 + ; (module Idref : Datatype.S) 53 + ; (module Idrefs : Datatype.S) 54 + ]
+35
lib/html5_checker/datatype/dt_id.mli
··· 1 + (** ID-related datatype validators for HTML5. 2 + 3 + This module provides validators for HTML5 ID attributes and ID references 4 + based on the Nu HTML Checker's implementation. *) 5 + 6 + (** ID validator. 7 + 8 + Accepts any string that consists of one or more characters and does not 9 + contain any whitespace characters. 10 + 11 + An ID must be: 12 + - Non-empty 13 + - Contain no whitespace characters (space, tab, LF, FF, CR) *) 14 + module Id : Datatype.S 15 + 16 + (** ID reference validator. 17 + 18 + An IDREF has the same validation rules as an ID - it must be non-empty 19 + and contain no whitespace. The semantic difference is that an IDREF 20 + references an existing ID rather than defining one. *) 21 + module Idref : Datatype.S 22 + 23 + (** ID references validator. 24 + 25 + Accepts a space-separated list of ID references. The value must contain 26 + at least one non-whitespace character. 27 + 28 + IDREFS values: 29 + - Must not be empty or contain only whitespace 30 + - Can contain multiple space-separated ID references 31 + - Each individual reference follows IDREF rules *) 32 + module Idrefs : Datatype.S 33 + 34 + (** List of all ID-related datatypes for registration. *) 35 + val datatypes : Datatype.t list
+46
lib/html5_checker/datatype/dt_input_type.ml
··· 1 + (** Input type attribute validation based on HTML5 spec *) 2 + 3 + (** Valid input type values *) 4 + let valid_types = 5 + [ 6 + "hidden"; 7 + "text"; 8 + "search"; 9 + "tel"; 10 + "url"; 11 + "email"; 12 + "password"; 13 + "date"; 14 + "month"; 15 + "week"; 16 + "time"; 17 + "datetime-local"; 18 + "number"; 19 + "range"; 20 + "color"; 21 + "checkbox"; 22 + "radio"; 23 + "file"; 24 + "submit"; 25 + "image"; 26 + "reset"; 27 + "button"; 28 + ] 29 + 30 + module Input_type = struct 31 + let name = "input-type" 32 + 33 + let validate s = 34 + let s_lower = Datatype.string_to_ascii_lowercase s in 35 + if List.mem s_lower valid_types then Ok () 36 + else 37 + Error 38 + (Printf.sprintf 39 + "The value '%s' is not a valid input type. Expected one of: %s." 40 + s 41 + (String.concat ", " valid_types)) 42 + 43 + let is_valid s = Result.is_ok (validate s) 44 + end 45 + 46 + let datatypes = [ (module Input_type : Datatype.S) ]
+42
lib/html5_checker/datatype/dt_input_type.mli
··· 1 + (** Input type attribute datatype validator. 2 + 3 + This module provides a validator for the type attribute used on 4 + input elements, as defined by the HTML5 specification. *) 5 + 6 + (** Input type attribute validator. 7 + 8 + Validates input type attribute values which can be: 9 + - hidden - Hidden input field 10 + - text - Single-line text field 11 + - search - Search input field 12 + - tel - Telephone number input field 13 + - url - URL input field 14 + - email - Email address input field 15 + - password - Password input field 16 + - date - Date input field (year, month, day) 17 + - month - Month input field (year, month) 18 + - week - Week input field (year, week) 19 + - time - Time input field (hour, minute, seconds, fractional seconds) 20 + - datetime-local - Local date and time input field 21 + - number - Numeric input field 22 + - range - Range control (slider) 23 + - color - Color picker 24 + - checkbox - Checkbox 25 + - radio - Radio button 26 + - file - File upload control 27 + - submit - Submit button 28 + - image - Image submit button 29 + - reset - Reset button 30 + - button - Push button 31 + 32 + Values are matched case-insensitively according to HTML5 spec. 33 + 34 + Examples: 35 + - "text" 36 + - "email" 37 + - "datetime-local" 38 + - "submit" *) 39 + module Input_type : Datatype.S 40 + 41 + (** List of all datatypes defined in this module *) 42 + val datatypes : Datatype.t list
+23
lib/html5_checker/datatype/dt_inputmode.ml
··· 1 + (** Input mode attribute validation for HTML5 *) 2 + 3 + module Inputmode = struct 4 + let name = "inputmode" 5 + 6 + let validate s = 7 + let s_lower = Datatype.string_to_ascii_lowercase s in 8 + match s_lower with 9 + | "" | "none" | "text" | "decimal" | "numeric" | "tel" | "search" 10 + | "email" | "url" -> 11 + Ok () 12 + | _ -> 13 + Error 14 + (Printf.sprintf 15 + "The value '%s' is not a valid inputmode value. Expected one \ 16 + of: empty string, 'none', 'text', 'decimal', 'numeric', 'tel', \ 17 + 'search', 'email', or 'url'." 18 + s) 19 + 20 + let is_valid s = Result.is_ok (validate s) 21 + end 22 + 23 + let datatypes = [ (module Inputmode : Datatype.S) ]
+29
lib/html5_checker/datatype/dt_inputmode.mli
··· 1 + (** Input mode attribute datatype validator. 2 + 3 + This module provides a validator for the inputmode attribute used to hint 4 + at the type of data the user might enter, as defined by the HTML5 spec. *) 5 + 6 + (** Inputmode attribute validator. 7 + 8 + Validates inputmode attribute values which can be: 9 + - "" (empty string, no specific input mode) 10 + - "none" (no virtual keyboard) 11 + - "text" (standard text input) 12 + - "decimal" (decimal numeric input with locale-appropriate format) 13 + - "numeric" (numeric input) 14 + - "tel" (telephone number input) 15 + - "search" (search input) 16 + - "email" (email address input) 17 + - "url" (URL input) 18 + 19 + Values are case-insensitive after ASCII lowercasing. 20 + 21 + Examples: 22 + - "" 23 + - "numeric" 24 + - "email" 25 + - "tel" *) 26 + module Inputmode : Datatype.S 27 + 28 + (** List of all datatypes defined in this module *) 29 + val datatypes : Datatype.t list
+71
lib/html5_checker/datatype/dt_integer.ml
··· 1 + (** Valid HTML5 integer (optional sign followed by digits) *) 2 + module Integer = struct 3 + let name = "integer" 4 + 5 + let validate s = 6 + let len = String.length s in 7 + if len = 0 then Error "The empty string is not a valid integer." 8 + else 9 + let start_pos = 10 + if s.[0] = '-' then 11 + if len = 1 then failwith "unreachable" 12 + else 1 13 + else 0 14 + in 15 + (* First character must be minus or digit *) 16 + if start_pos = 0 && not (Datatype.is_ascii_digit s.[0]) then 17 + Error 18 + (Printf.sprintf "Expected a minus sign or a digit but saw '%c' instead." 19 + s.[0]) 20 + else 21 + (* Rest must be digits *) 22 + let rec check_digits i = 23 + if i >= len then Ok () 24 + else if Datatype.is_ascii_digit s.[i] then check_digits (i + 1) 25 + else 26 + Error (Printf.sprintf "Expected a digit but saw '%c' instead." s.[i]) 27 + in 28 + check_digits start_pos 29 + 30 + let is_valid s = Result.is_ok (validate s) 31 + end 32 + 33 + (** Non-negative integer (>= 0) *) 34 + module Integer_non_negative = struct 35 + let name = "non-negative integer" 36 + 37 + let validate s = 38 + let len = String.length s in 39 + if len = 0 then Error "The empty string is not a valid non-negative integer." 40 + else 41 + (* All characters must be digits *) 42 + let rec check_digits i = 43 + if i >= len then Ok () 44 + else if Datatype.is_ascii_digit s.[i] then check_digits (i + 1) 45 + else Error (Printf.sprintf "Expected a digit but saw '%c' instead." s.[i]) 46 + in 47 + check_digits 0 48 + 49 + let is_valid s = Result.is_ok (validate s) 50 + end 51 + 52 + (** Positive integer (> 0) *) 53 + module Integer_positive = struct 54 + let name = "positive integer" 55 + 56 + let validate s = 57 + let len = String.length s in 58 + if len = 0 then Error "The empty string is not a valid positive integer." 59 + else 60 + (* All characters must be digits *) 61 + let rec check_digits i all_zeros = 62 + if i >= len then 63 + if all_zeros then Error "Zero is not a positive integer." else Ok () 64 + else if Datatype.is_ascii_digit s.[i] then 65 + check_digits (i + 1) (all_zeros && s.[i] = '0') 66 + else Error (Printf.sprintf "Expected a digit but saw '%c' instead." s.[i]) 67 + in 68 + check_digits 0 true 69 + 70 + let is_valid s = Result.is_ok (validate s) 71 + end
+16
lib/html5_checker/datatype/dt_integer.mli
··· 1 + (** Integer datatype validators for HTML5 *) 2 + 3 + (** Valid HTML5 integer (optional sign followed by digits) *) 4 + module Integer : sig 5 + include Datatype.S 6 + end 7 + 8 + (** Non-negative integer (>= 0) *) 9 + module Integer_non_negative : sig 10 + include Datatype.S 11 + end 12 + 13 + (** Positive integer (> 0) *) 14 + module Integer_positive : sig 15 + include Datatype.S 16 + end
+103
lib/html5_checker/datatype/dt_integrity.ml
··· 1 + (** Subresource integrity attribute validation *) 2 + 3 + (** Valid hash algorithms *) 4 + let valid_algorithms = [ "sha256"; "sha384"; "sha512" ] 5 + 6 + (** Check if character is valid base64 character *) 7 + let is_base64_char c = 8 + (c >= 'A' && c <= 'Z') 9 + || (c >= 'a' && c <= 'z') 10 + || (c >= '0' && c <= '9') 11 + || c = '+' || c = '/' || c = '=' 12 + 13 + (** Validate base64 encoding *) 14 + let validate_base64 s = 15 + if String.length s = 0 then false 16 + else 17 + (* Check all characters are valid base64 *) 18 + let all_valid = ref true in 19 + for i = 0 to String.length s - 1 do 20 + if not (is_base64_char s.[i]) then all_valid := false 21 + done; 22 + if not !all_valid then false 23 + else 24 + (* Check padding is at the end only *) 25 + let has_padding = String.contains s '=' in 26 + if not has_padding then true 27 + else 28 + (* Find first '=' *) 29 + let first_eq = String.index s '=' in 30 + (* All chars after first '=' must be '=' *) 31 + let valid_padding = ref true in 32 + for i = first_eq to String.length s - 1 do 33 + if s.[i] <> '=' then valid_padding := false 34 + done; 35 + !valid_padding 36 + && (* At most 2 padding characters *) 37 + String.length s - first_eq <= 2 38 + 39 + (** Validate a single hash value *) 40 + let validate_hash_value s = 41 + let trimmed = Datatype.trim_html_spaces s in 42 + if trimmed = "" then Error "Hash value must not be empty" 43 + else 44 + (* Split on '-' to get algorithm and hash *) 45 + match String.index_opt trimmed '-' with 46 + | None -> 47 + Error 48 + (Printf.sprintf 49 + "Hash value '%s' must be in format 'algorithm-base64hash'" trimmed) 50 + | Some dash_pos -> 51 + let algorithm = String.sub trimmed 0 dash_pos in 52 + let algorithm_lower = Datatype.string_to_ascii_lowercase algorithm in 53 + if not (List.mem algorithm_lower valid_algorithms) then 54 + Error 55 + (Printf.sprintf 56 + "Hash algorithm '%s' is not supported. Must be one of: %s" 57 + algorithm (String.concat ", " valid_algorithms)) 58 + else 59 + let rest = String.sub trimmed (dash_pos + 1) (String.length trimmed - dash_pos - 1) in 60 + (* Split on '?' to separate hash from options *) 61 + let hash_part = 62 + match String.index_opt rest '?' with 63 + | None -> rest 64 + | Some q_pos -> String.sub rest 0 q_pos 65 + in 66 + if String.length hash_part = 0 then 67 + Error "Hash value after algorithm must not be empty" 68 + else if not (validate_base64 hash_part) then 69 + Error 70 + (Printf.sprintf 71 + "Hash value '%s' is not valid base64 encoding" hash_part) 72 + else Ok () 73 + 74 + (** Validate integrity attribute value *) 75 + let validate_integrity s = 76 + let trimmed = Datatype.trim_html_spaces s in 77 + if trimmed = "" then Error "Integrity attribute must not be empty" 78 + else 79 + (* Split on whitespace *) 80 + let hash_values = String.split_on_char ' ' trimmed in 81 + let hash_values = 82 + List.filter (fun h -> Datatype.trim_html_spaces h <> "") hash_values 83 + in 84 + if hash_values = [] then 85 + Error "Integrity attribute must contain at least one hash value" 86 + else 87 + (* Validate each hash value *) 88 + let rec check_hashes = function 89 + | [] -> Ok () 90 + | h :: rest -> ( 91 + match validate_hash_value h with 92 + | Error e -> Error e 93 + | Ok () -> check_hashes rest) 94 + in 95 + check_hashes hash_values 96 + 97 + module Integrity = struct 98 + let name = "integrity" 99 + let validate = validate_integrity 100 + let is_valid s = Result.is_ok (validate s) 101 + end 102 + 103 + let datatypes = [ (module Integrity : Datatype.S) ]
+27
lib/html5_checker/datatype/dt_integrity.mli
··· 1 + (** Subresource integrity attribute validator. 2 + 3 + This module provides a validator for the integrity attribute used on 4 + script and link elements for subresource integrity checks, as defined 5 + by the W3C Subresource Integrity specification. *) 6 + 7 + (** Integrity attribute validator. 8 + 9 + Validates integrity attribute values which contain space-separated hash 10 + values. Each hash value consists of: 11 + - An algorithm identifier (sha256, sha384, or sha512) 12 + - A hyphen (-) 13 + - The base64-encoded hash value 14 + - Optional options preceded by '?' 15 + 16 + Examples: 17 + - "sha256-abc123..." 18 + - "sha384-xyz789..." 19 + - "sha256-abc123... sha512-def456..." 20 + - "sha256-abc123...?ct=application/javascript" 21 + 22 + The base64 encoding must be valid and the algorithm must be one of the 23 + supported hash functions. *) 24 + module Integrity : Datatype.S 25 + 26 + (** List of all datatypes defined in this module *) 27 + val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_kind.ml
··· 1 + (** Kind attribute validation for HTML5 *) 2 + 3 + module Kind = struct 4 + let name = "kind" 5 + 6 + let validate s = 7 + let s_lower = Datatype.string_to_ascii_lowercase s in 8 + match s_lower with 9 + | "subtitles" | "captions" | "descriptions" | "chapters" | "metadata" -> Ok () 10 + | _ -> 11 + Error 12 + (Printf.sprintf 13 + "The value '%s' is not a valid kind. Expected 'subtitles', \ 14 + 'captions', 'descriptions', 'chapters', or 'metadata'." 15 + s) 16 + 17 + let is_valid s = Result.is_ok (validate s) 18 + end 19 + 20 + let datatypes = [ (module Kind : Datatype.S) ]
+29
lib/html5_checker/datatype/dt_kind.mli
··· 1 + (** Kind attribute datatype validator for HTML5. 2 + 3 + This module provides a validator for the kind attribute used on track 4 + elements, as defined by the HTML5 specification. *) 5 + 6 + (** Kind attribute validator. 7 + 8 + Validates kind attribute values which can be: 9 + - "subtitles" - Transcription or translation of the dialogue, suitable for 10 + when the sound is available but not understood 11 + - "captions" - Transcription or translation of the dialogue, sound effects, 12 + relevant musical cues, and other relevant audio information, suitable for 13 + when sound is unavailable or not clearly audible 14 + - "descriptions" - Textual descriptions of the video component, suitable for 15 + audio synthesis when the visual component is unavailable 16 + - "chapters" - Chapter titles, for use in navigating the media resource 17 + - "metadata" - Tracks intended for use from script, not displayed by the user 18 + agent 19 + 20 + Values are case-insensitive. 21 + 22 + Examples: 23 + - "subtitles" 24 + - "captions" 25 + - "descriptions" *) 26 + module Kind : Datatype.S 27 + 28 + (** List of all datatypes defined in this module *) 29 + val datatypes : Datatype.t list
+110
lib/html5_checker/datatype/dt_language.ml
··· 1 + (** Helper functions for language tag validation *) 2 + 3 + let is_lower_alpha c = c >= 'a' && c <= 'z' 4 + let is_upper_alpha c = c >= 'A' && c <= 'Z' 5 + let is_alpha c = is_lower_alpha c || is_upper_alpha c 6 + let is_digit c = c >= '0' && c <= '9' 7 + let is_alphanumeric c = is_alpha c || is_digit c 8 + 9 + let is_all_alpha s = 10 + String.for_all is_alpha s 11 + 12 + let _is_all_digits s = 13 + String.for_all is_digit s 14 + 15 + let is_all_alphanumeric s = 16 + String.for_all is_alphanumeric s 17 + 18 + let to_lower s = 19 + String.lowercase_ascii s 20 + 21 + (** Validate language tag structure according to BCP 47. 22 + This is a simplified validator that checks structural validity 23 + but does not validate against the IANA registry. *) 24 + let validate_language_structure s = 25 + if String.length s = 0 then 26 + Error "The empty string is not a valid language tag" 27 + else if String.starts_with ~prefix:"-" s then 28 + Error "Language tag must not start with HYPHEN-MINUS" 29 + else if String.ends_with ~suffix:"-" s then 30 + Error "Language tag must not end with HYPHEN-MINUS" 31 + else 32 + let subtags = String.split_on_char '-' s in 33 + 34 + (* Check for empty subtags and length constraints *) 35 + let rec check_subtag_constraints = function 36 + | [] -> Ok () 37 + | subtag :: rest -> 38 + let len = String.length subtag in 39 + if len = 0 then 40 + Error "Zero-length subtag" 41 + else if len > 8 then 42 + Error "Subtags must not exceed 8 characters in length" 43 + else 44 + check_subtag_constraints rest 45 + in 46 + 47 + match check_subtag_constraints subtags with 48 + | Error e -> Error e 49 + | Ok () -> 50 + (* Primary language subtag validation *) 51 + match subtags with 52 + | [] -> Error "Language tag must have at least one subtag" 53 + | first :: rest -> 54 + let first_lower = to_lower first in 55 + let len = String.length first_lower in 56 + 57 + (* Check for private use tag *) 58 + if first_lower = "x" then 59 + if rest = [] then 60 + Error "No subtags in private use sequence" 61 + else 62 + (* Private use subtags must be 1-8 alphanumeric *) 63 + let rec check_private_use = function 64 + | [] -> Ok () 65 + | subtag :: rest -> 66 + let subtag_lower = to_lower subtag in 67 + if String.length subtag_lower < 1 then 68 + Error "Private use subtag is too short" 69 + else if not (is_all_alphanumeric subtag_lower) then 70 + Error "Bad character in private use subtag" 71 + else 72 + check_private_use rest 73 + in 74 + check_private_use rest 75 + (* Primary language: 2-3 letters (ISO 639) *) 76 + else if (len = 2 || len = 3) && is_all_alpha first_lower then 77 + Ok () 78 + (* Reserved: 4 letters *) 79 + else if len = 4 && is_all_alpha first_lower then 80 + Error "Found reserved language tag" 81 + (* Registered: 5+ letters *) 82 + else if len >= 5 && is_all_alpha first_lower then 83 + Ok () 84 + else 85 + Error "Invalid language subtag format" 86 + 87 + module Language = struct 88 + let name = "language tag" 89 + 90 + let validate s = validate_language_structure s 91 + 92 + let is_valid s = Result.is_ok (validate s) 93 + end 94 + 95 + module Language_or_empty = struct 96 + let name = "language tag or empty" 97 + 98 + let validate s = 99 + if String.length s = 0 then 100 + Ok () 101 + else 102 + validate_language_structure s 103 + 104 + let is_valid s = Result.is_ok (validate s) 105 + end 106 + 107 + let datatypes = [ 108 + (module Language : Datatype.S); 109 + (module Language_or_empty : Datatype.S); 110 + ]
+43
lib/html5_checker/datatype/dt_language.mli
··· 1 + (** Language tag datatype validators for HTML5. 2 + 3 + This module provides validators for BCP 47 language tags as used in HTML5. 4 + Language tags identify natural languages and consist of subtags separated 5 + by hyphens, following the IETF BCP 47 standard. *) 6 + 7 + (** Language tag datatype (BCP 47 format). 8 + 9 + Validates a language tag according to BCP 47. A language tag consists of: 10 + - Primary language subtag (2-3 letters, or 5+ letters for registered languages) 11 + - Optional extended language subtag (3 letters) 12 + - Optional script subtag (4 letters) 13 + - Optional region subtag (2 letters or 3 digits) 14 + - Optional variant subtags (5-8 alphanumeric characters, or 4 starting with digit) 15 + - Optional extension subtags (single letter + subtags) 16 + - Optional private use subtags (starting with 'x-') 17 + 18 + Examples: 19 + - "en" (English) 20 + - "en-US" (US English) 21 + - "zh-Hans" (Simplified Chinese) 22 + - "zh-Hans-CN" (Simplified Chinese as used in China) 23 + 24 + The validator performs basic structural validation: 25 + - Tag cannot be empty 26 + - Tag cannot start or end with hyphen 27 + - Subtags cannot be empty 28 + - Subtags cannot exceed 8 characters (except for registered values) 29 + - Primary language subtag must be 2-3 letters (ISO 639) or 5+ letters (registered) 30 + - 4-letter primary subtags are reserved 31 + 32 + Note: This implementation does NOT validate against the IANA language 33 + subtag registry. It only validates the structural format. *) 34 + module Language : Datatype.S 35 + 36 + (** Language tag or empty string. 37 + 38 + Like Language but also accepts the empty string. This is used for cases 39 + where lang="" is valid to indicate an unknown or unspecified language. *) 40 + module Language_or_empty : Datatype.S 41 + 42 + (** List of all language datatypes *) 43 + val datatypes : Datatype.t list
+42
lib/html5_checker/datatype/dt_list_type.ml
··· 1 + (** List type attribute validation based on HTML5 spec *) 2 + 3 + (** Valid ol type values (case-sensitive) *) 4 + let valid_ol_types = [ "1"; "a"; "A"; "i"; "I" ] 5 + 6 + (** Valid ul type values (deprecated but still parsed) *) 7 + let valid_ul_types = [ "disc"; "circle"; "square" ] 8 + 9 + module Ol_type = struct 10 + let name = "ol-type" 11 + 12 + let validate s = 13 + (* Note: ol type is case-sensitive *) 14 + if List.mem s valid_ol_types then Ok () 15 + else 16 + Error 17 + (Printf.sprintf 18 + "The value '%s' is not a valid ol type. Expected one of: %s." 19 + s 20 + (String.concat ", " valid_ol_types)) 21 + 22 + let is_valid s = Result.is_ok (validate s) 23 + end 24 + 25 + module Ul_type = struct 26 + let name = "ul-type" 27 + 28 + let validate s = 29 + let s_lower = Datatype.string_to_ascii_lowercase s in 30 + if List.mem s_lower valid_ul_types then Ok () 31 + else 32 + Error 33 + (Printf.sprintf 34 + "The value '%s' is not a valid ul type. Expected one of: %s." 35 + s 36 + (String.concat ", " valid_ul_types)) 37 + 38 + let is_valid s = Result.is_ok (validate s) 39 + end 40 + 41 + let datatypes = 42 + [ (module Ol_type : Datatype.S); (module Ul_type : Datatype.S) ]
+44
lib/html5_checker/datatype/dt_list_type.mli
··· 1 + (** List type attribute datatype validators. 2 + 3 + This module provides validators for the type attribute used on 4 + ol and ul elements, as defined by the HTML5 specification. *) 5 + 6 + (** Ordered list type attribute validator. 7 + 8 + Validates ol type attribute values which can be: 9 + - 1 - Decimal numbers (1, 2, 3, ...) 10 + - a - Lowercase Latin letters (a, b, c, ...) 11 + - A - Uppercase Latin letters (A, B, C, ...) 12 + - i - Lowercase Roman numerals (i, ii, iii, ...) 13 + - I - Uppercase Roman numerals (I, II, III, ...) 14 + 15 + Values are matched case-sensitively for ol type. 16 + 17 + Examples: 18 + - "1" 19 + - "a" 20 + - "A" 21 + - "i" 22 + - "I" *) 23 + module Ol_type : Datatype.S 24 + 25 + (** Unordered list type attribute validator. 26 + 27 + Validates ul type attribute values which can be: 28 + - disc - Filled circle (default) 29 + - circle - Hollow circle 30 + - square - Filled square 31 + 32 + Note: The type attribute on ul is deprecated in HTML5 but may still 33 + be parsed for backwards compatibility. 34 + 35 + Values are matched case-insensitively according to HTML5 spec. 36 + 37 + Examples: 38 + - "disc" 39 + - "circle" 40 + - "square" *) 41 + module Ul_type : Datatype.S 42 + 43 + (** List of all datatypes defined in this module *) 44 + val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_loading.ml
··· 1 + (** Lazy loading attribute validation for HTML5 *) 2 + 3 + module Loading = struct 4 + let name = "loading" 5 + 6 + let validate s = 7 + let s_lower = Datatype.string_to_ascii_lowercase s in 8 + match s_lower with 9 + | "" | "lazy" | "eager" -> Ok () 10 + | _ -> 11 + Error 12 + (Printf.sprintf 13 + "The value '%s' is not a valid loading value. Expected empty \ 14 + string, 'lazy', or 'eager'." 15 + s) 16 + 17 + let is_valid s = Result.is_ok (validate s) 18 + end 19 + 20 + let datatypes = [ (module Loading : Datatype.S) ]
+22
lib/html5_checker/datatype/dt_loading.mli
··· 1 + (** Lazy loading attribute datatype validator. 2 + 3 + This module provides a validator for the loading attribute used to control 4 + lazy loading behavior for images and iframes, as defined by the HTML5 spec. *) 5 + 6 + (** Loading attribute validator. 7 + 8 + Validates loading attribute values which can be: 9 + - "" (empty string, default loading behavior) 10 + - "lazy" (defer loading until needed) 11 + - "eager" (load immediately) 12 + 13 + Values are case-insensitive after ASCII lowercasing. 14 + 15 + Examples: 16 + - "" 17 + - "lazy" 18 + - "eager" *) 19 + module Loading : Datatype.S 20 + 21 + (** List of all datatypes defined in this module *) 22 + val datatypes : Datatype.t list
+144
lib/html5_checker/datatype/dt_media_query.ml
··· 1 + (** Media query validation - simplified implementation *) 2 + 3 + (** Media types *) 4 + let media_types = 5 + [ 6 + "all"; 7 + "screen"; 8 + "print"; 9 + "speech"; 10 + "aural"; 11 + "braille"; 12 + "handheld"; 13 + "projection"; 14 + "tty"; 15 + "tv"; 16 + "embossed"; 17 + ] 18 + 19 + (** Media query keywords *) 20 + let media_keywords = [ "and"; "or"; "not"; "only" ] 21 + 22 + (** Check if character is whitespace *) 23 + let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r' 24 + 25 + (** Check if character can start an identifier *) 26 + let is_ident_start c = 27 + (c >= 'a' && c <= 'z') 28 + || (c >= 'A' && c <= 'Z') 29 + || c = '_' || c = '-' || Char.code c >= 128 30 + 31 + (** Check if character can be in an identifier *) 32 + let is_ident_char c = 33 + is_ident_start c || (c >= '0' && c <= '9') 34 + 35 + (** Check balanced parentheses *) 36 + let check_balanced_parens s = 37 + let rec check depth i = 38 + if i >= String.length s then 39 + if depth = 0 then Ok () 40 + else Error "Unbalanced parentheses: unclosed '('" 41 + else 42 + let c = s.[i] in 43 + match c with 44 + | '(' -> check (depth + 1) (i + 1) 45 + | ')' -> 46 + if depth = 0 then Error "Unbalanced parentheses: unexpected ')'" 47 + else check (depth - 1) (i + 1) 48 + | _ -> check depth (i + 1) 49 + in 50 + check 0 0 51 + 52 + (** Extract words (identifiers and keywords) from media query *) 53 + let extract_words s = 54 + let words = ref [] in 55 + let buf = Buffer.create 16 in 56 + let in_parens = ref 0 in 57 + 58 + for i = 0 to String.length s - 1 do 59 + let c = s.[i] in 60 + match c with 61 + | '(' -> 62 + if Buffer.length buf > 0 then ( 63 + words := Buffer.contents buf :: !words; 64 + Buffer.clear buf); 65 + incr in_parens 66 + | ')' -> 67 + if Buffer.length buf > 0 then ( 68 + words := Buffer.contents buf :: !words; 69 + Buffer.clear buf); 70 + decr in_parens 71 + | _ -> 72 + if !in_parens = 0 then 73 + if is_ident_char c then Buffer.add_char buf c 74 + else if is_whitespace c then 75 + if Buffer.length buf > 0 then ( 76 + words := Buffer.contents buf :: !words; 77 + Buffer.clear buf) 78 + else () 79 + else if Buffer.length buf > 0 then ( 80 + words := Buffer.contents buf :: !words; 81 + Buffer.clear buf) 82 + done; 83 + 84 + if Buffer.length buf > 0 then words := Buffer.contents buf :: !words; 85 + List.rev !words 86 + 87 + (** Validate media query structure *) 88 + let validate_media_query s = 89 + let s = String.trim s in 90 + if String.length s = 0 then Error "Media query must not be empty" 91 + else 92 + (* Check balanced parentheses *) 93 + match check_balanced_parens s with 94 + | Error _ as e -> e 95 + | Ok () -> 96 + (* Extract and validate words *) 97 + let words = extract_words s in 98 + let words_lower = List.map String.lowercase_ascii words in 99 + 100 + (* Basic validation: check for invalid keyword combinations *) 101 + let rec validate_words prev = function 102 + | [] -> Ok () 103 + | word :: rest -> ( 104 + let word_lower = String.lowercase_ascii word in 105 + match (prev, word_lower) with 106 + | None, "and" | None, "or" -> 107 + Error 108 + (Printf.sprintf 109 + "Media query cannot start with keyword '%s'" word) 110 + | Some "and", "and" | Some "or", "or" | Some "not", "not" -> 111 + Error 112 + (Printf.sprintf "Consecutive '%s' keywords are not allowed" 113 + word) 114 + | Some "only", "only" -> 115 + Error "Consecutive 'only' keywords are not allowed" 116 + | _, _ -> validate_words (Some word_lower) rest) 117 + in 118 + 119 + (* Check if query contains valid media types or features *) 120 + let has_media_type = 121 + List.exists 122 + (fun w -> List.mem (String.lowercase_ascii w) media_types) 123 + words 124 + in 125 + let has_features = String.contains s '(' in 126 + 127 + if not (has_media_type || has_features) then 128 + (* Only keywords, no actual media type or features *) 129 + if List.for_all (fun w -> List.mem w media_keywords) words_lower then 130 + Error "Media query contains only keywords without media type or features" 131 + else Ok () (* Assume other identifiers are valid *) 132 + else validate_words None words 133 + 134 + module Media_query = struct 135 + let name = "media query" 136 + let validate = validate_media_query 137 + 138 + let is_valid s = 139 + match validate s with 140 + | Ok () -> true 141 + | Error _ -> false 142 + end 143 + 144 + let datatypes = [ (module Media_query : Datatype.S) ]
+28
lib/html5_checker/datatype/dt_media_query.mli
··· 1 + (** Media query datatype validator. 2 + 3 + This module provides a validator for CSS media queries as used in HTML5. *) 4 + 5 + (** Media query validator. 6 + 7 + Validates CSS media queries used in media attributes and CSS @media rules. 8 + 9 + Examples: 10 + - "screen" 11 + - "print" 12 + - "(min-width: 600px)" 13 + - "screen and (color)" 14 + - "not screen and (color)" 15 + - "(min-width: 600px) and (max-width: 800px)" 16 + 17 + This is a simplified validator that checks: 18 + - Balanced parentheses 19 + - Basic media type keywords (all, screen, print, etc.) 20 + - Basic logical operators (and, or, not, only) 21 + - Valid feature queries in parentheses 22 + 23 + Note: This does not perform full CSS media query parsing. For production 24 + use, consider integrating with a full CSS parser. *) 25 + module Media_query : Datatype.S 26 + 27 + (** List of all datatypes defined in this module *) 28 + val datatypes : Datatype.t list
+210
lib/html5_checker/datatype/dt_mime.ml
··· 1 + (** MIME type validation based on RFC 2045 and HTML5 spec *) 2 + 3 + (** Check if character is whitespace *) 4 + let is_whitespace c = c = ' ' || c = '\t' || c = '\n' || c = '\r' 5 + 6 + (** Check if character is a token character (RFC 2045) *) 7 + let is_token_char c = 8 + (c >= '\033' && c <= '\126') 9 + && not 10 + (c = '(' || c = ')' || c = '<' || c = '>' || c = '@' || c = ',' 11 + || c = ';' || c = ':' || c = '\\' || c = '"' || c = '/' || c = '[' 12 + || c = ']' || c = '?' || c = '=' || c = '{' || c = '}') 13 + 14 + (** Check if character is valid in quoted string (qdtext) *) 15 + let is_qdtext_char c = 16 + (c >= ' ' && c <= '\126') || c = '\n' || c = '\r' || c = '\t' 17 + 18 + (** States for MIME type parser *) 19 + type parse_state = 20 + | At_start 21 + | In_supertype 22 + | At_subtype_start 23 + | In_subtype 24 + | Semicolon_seen 25 + | Ws_before_semicolon 26 + | In_param_name 27 + | Equals_seen 28 + | In_quoted_string 29 + | In_unquoted_string 30 + | In_quoted_pair 31 + | Close_quote_seen 32 + 33 + (** JavaScript MIME types that should not have parameters *) 34 + let javascript_mime_types = 35 + [ 36 + "application/ecmascript"; 37 + "application/javascript"; 38 + "application/x-ecmascript"; 39 + "application/x-javascript"; 40 + "text/ecmascript"; 41 + "text/javascript"; 42 + "text/javascript1.0"; 43 + "text/javascript1.1"; 44 + "text/javascript1.2"; 45 + "text/javascript1.3"; 46 + "text/javascript1.4"; 47 + "text/javascript1.5"; 48 + "text/jscript"; 49 + "text/livescript"; 50 + "text/x-ecmascript"; 51 + "text/x-javascript"; 52 + ] 53 + 54 + (** Validate a single MIME type *) 55 + let validate_mime_type s = 56 + let len = String.length s in 57 + let rec parse state i = 58 + if i >= len then 59 + (* End of string - check final state *) 60 + match state with 61 + | In_subtype | In_unquoted_string | Close_quote_seen -> Ok () 62 + | At_start -> Error "Expected a MIME type but saw the empty string" 63 + | In_supertype | At_subtype_start -> Error "Subtype missing" 64 + | Equals_seen | In_param_name -> Error "Parameter value missing" 65 + | In_quoted_pair | In_quoted_string -> Error "Unfinished quoted string" 66 + | Semicolon_seen -> 67 + Error "Semicolon seen but there was no parameter following it" 68 + | Ws_before_semicolon -> Error "Extraneous trailing whitespace" 69 + else 70 + let c = s.[i] in 71 + match state with 72 + | At_start -> 73 + if is_token_char c then parse In_supertype (i + 1) 74 + else 75 + Error 76 + (Printf.sprintf 77 + "Expected a token character but saw '%c' instead" c) 78 + | In_supertype -> 79 + if is_token_char c then parse In_supertype (i + 1) 80 + else if c = '/' then parse At_subtype_start (i + 1) 81 + else 82 + Error 83 + (Printf.sprintf 84 + "Expected a token character or '/' but saw '%c' instead" c) 85 + | At_subtype_start -> 86 + if is_token_char c then parse In_subtype (i + 1) 87 + else 88 + Error 89 + (Printf.sprintf 90 + "Expected a token character but saw '%c' instead" c) 91 + | In_subtype -> 92 + if is_token_char c then parse In_subtype (i + 1) 93 + else if c = ';' then 94 + (* Check if this is a JavaScript MIME type *) 95 + let mime_type = String.sub s 0 i |> String.lowercase_ascii in 96 + if List.mem mime_type javascript_mime_types then 97 + Error 98 + "A JavaScript MIME type must not contain any characters after \ 99 + the subtype" 100 + else parse Semicolon_seen (i + 1) 101 + else if is_whitespace c then parse Ws_before_semicolon (i + 1) 102 + else 103 + Error 104 + (Printf.sprintf 105 + "Expected a token character, whitespace or a semicolon but saw \ 106 + '%c' instead" 107 + c) 108 + | Ws_before_semicolon -> 109 + if is_whitespace c then parse Ws_before_semicolon (i + 1) 110 + else if c = ';' then parse Semicolon_seen (i + 1) 111 + else 112 + Error 113 + (Printf.sprintf 114 + "Expected whitespace or a semicolon but saw '%c' instead" c) 115 + | Semicolon_seen -> 116 + if is_whitespace c then parse Semicolon_seen (i + 1) 117 + else if is_token_char c then parse In_param_name (i + 1) 118 + else 119 + Error 120 + (Printf.sprintf 121 + "Expected whitespace or a token character but saw '%c' instead" 122 + c) 123 + | In_param_name -> 124 + if is_token_char c then parse In_param_name (i + 1) 125 + else if c = '=' then parse Equals_seen (i + 1) 126 + else 127 + Error 128 + (Printf.sprintf "Expected a token character or '=' but saw '%c' instead" 129 + c) 130 + | Equals_seen -> 131 + if c = '"' then parse In_quoted_string (i + 1) 132 + else if is_token_char c then parse In_unquoted_string (i + 1) 133 + else 134 + Error 135 + (Printf.sprintf 136 + "Expected a double quote or a token character but saw '%c' \ 137 + instead" 138 + c) 139 + | In_quoted_string -> 140 + if c = '\\' then parse In_quoted_pair (i + 1) 141 + else if c = '"' then parse Close_quote_seen (i + 1) 142 + else if is_qdtext_char c then parse In_quoted_string (i + 1) 143 + else 144 + Error 145 + (Printf.sprintf 146 + "Expected a non-control ASCII character but saw '%c' instead" c) 147 + | In_quoted_pair -> 148 + if Char.code c <= 127 then parse In_quoted_string (i + 1) 149 + else 150 + Error 151 + (Printf.sprintf "Expected an ASCII character but saw '%c' instead" 152 + c) 153 + | Close_quote_seen -> 154 + if c = ';' then parse Semicolon_seen (i + 1) 155 + else if is_whitespace c then parse Ws_before_semicolon (i + 1) 156 + else 157 + Error 158 + (Printf.sprintf 159 + "Expected a semicolon or whitespace but saw '%c' instead" c) 160 + | In_unquoted_string -> 161 + if is_token_char c then parse In_unquoted_string (i + 1) 162 + else if c = ';' then parse Semicolon_seen (i + 1) 163 + else if is_whitespace c then parse Ws_before_semicolon (i + 1) 164 + else 165 + Error 166 + (Printf.sprintf 167 + "Expected a token character, whitespace or a semicolon but saw \ 168 + '%c' instead" 169 + c) 170 + in 171 + parse At_start 0 172 + 173 + module Mime_type = struct 174 + let name = "MIME type" 175 + let validate = validate_mime_type 176 + 177 + let is_valid s = 178 + match validate s with 179 + | Ok () -> true 180 + | Error _ -> false 181 + end 182 + 183 + module Mime_type_list = struct 184 + let name = "MIME type list" 185 + 186 + let validate s = 187 + let s = String.trim s in 188 + if String.length s = 0 then Error "MIME type list must not be empty" 189 + else 190 + (* Split on commas and validate each MIME type *) 191 + let mime_types = String.split_on_char ',' s in 192 + let rec check_all = function 193 + | [] -> Ok () 194 + | mime :: rest -> ( 195 + let mime = String.trim mime in 196 + match validate_mime_type mime with 197 + | Ok () -> check_all rest 198 + | Error msg -> 199 + Error (Printf.sprintf "Invalid MIME type in list: %s" msg)) 200 + in 201 + check_all mime_types 202 + 203 + let is_valid s = 204 + match validate s with 205 + | Ok () -> true 206 + | Error _ -> false 207 + end 208 + 209 + let datatypes = 210 + [ (module Mime_type : Datatype.S); (module Mime_type_list : Datatype.S) ]
+32
lib/html5_checker/datatype/dt_mime.mli
··· 1 + (** MIME type datatype validators. 2 + 3 + This module provides validators for MIME types (media types) as defined 4 + by RFC 2045 and used in HTML5. *) 5 + 6 + (** MIME type validator. 7 + 8 + Validates a MIME type in the format: type/subtype[; parameters] 9 + 10 + Examples: 11 + - text/html 12 + - application/json 13 + - image/png 14 + - text/html; charset=utf-8 15 + 16 + Validation rules: 17 + - Must have a supertype (before /) and subtype (after /) 18 + - Supertype and subtype must be token characters 19 + - Optional semicolon-separated parameters 20 + - Parameters must be name=value pairs 21 + - Values can be quoted strings or tokens *) 22 + module Mime_type : Datatype.S 23 + 24 + (** MIME type list validator. 25 + 26 + Validates a comma-separated list of MIME types. 27 + Each MIME type in the list must be valid according to {!Mime_type} rules. 28 + This is used for the 'accept' attribute on input elements. *) 29 + module Mime_type_list : Datatype.S 30 + 31 + (** List of all datatypes defined in this module *) 32 + val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_popover.ml
··· 1 + (** Popover attribute validation for HTML5 *) 2 + 3 + module Popover = struct 4 + let name = "popover" 5 + 6 + let validate s = 7 + let s_lower = Datatype.string_to_ascii_lowercase s in 8 + match s_lower with 9 + | "" | "auto" | "manual" -> Ok () 10 + | _ -> 11 + Error 12 + (Printf.sprintf 13 + "The value '%s' is not a valid popover value. Expected 'auto', \ 14 + 'manual', or empty string." 15 + s) 16 + 17 + let is_valid s = Result.is_ok (validate s) 18 + end 19 + 20 + let datatypes = [ (module Popover : Datatype.S) ]
+22
lib/html5_checker/datatype/dt_popover.mli
··· 1 + (** Popover attribute datatype validator for HTML5. 2 + 3 + This module provides a validator for the popover attribute, as defined by 4 + the HTML5 specification. *) 5 + 6 + (** Popover attribute validator. 7 + 8 + Validates popover attribute values which can be: 9 + - "auto" - the popover can be light-dismissed (closed by clicking outside) 10 + - "manual" - the popover must be explicitly closed 11 + - "" (empty string) - equivalent to "auto" 12 + 13 + Values are case-insensitive. 14 + 15 + Examples: 16 + - "auto" 17 + - "manual" 18 + - "" *) 19 + module Popover : Datatype.S 20 + 21 + (** List of all datatypes defined in this module *) 22 + val datatypes : Datatype.t list
+22
lib/html5_checker/datatype/dt_preload.ml
··· 1 + (** Media preload attribute validation based on HTML5 spec *) 2 + 3 + (** Valid preload values *) 4 + let valid_preloads = [ "none"; "metadata"; "auto"; "" ] 5 + 6 + module Preload = struct 7 + let name = "preload" 8 + 9 + let validate s = 10 + let s_lower = Datatype.string_to_ascii_lowercase s in 11 + if List.mem s_lower valid_preloads then Ok () 12 + else 13 + Error 14 + (Printf.sprintf 15 + "The value '%s' is not a valid preload value. Expected one of: \ 16 + 'none', 'metadata', 'auto', or empty string." 17 + s) 18 + 19 + let is_valid s = Result.is_ok (validate s) 20 + end 21 + 22 + let datatypes = [ (module Preload : Datatype.S) ]
+24
lib/html5_checker/datatype/dt_preload.mli
··· 1 + (** Media preload attribute datatype validator. 2 + 3 + This module provides a validator for the preload attribute used on 4 + audio and video elements, as defined by the HTML5 specification. *) 5 + 6 + (** Media preload attribute validator. 7 + 8 + Validates media preload attribute values which can be: 9 + - none - No preloading (only load metadata when user starts playback) 10 + - metadata - Preload metadata only (dimensions, duration, etc.) 11 + - auto - Preload the entire resource if possible 12 + - "" (empty string) - Equivalent to auto 13 + 14 + Values are matched case-insensitively according to HTML5 spec. 15 + 16 + Examples: 17 + - "none" 18 + - "metadata" 19 + - "auto" 20 + - "" *) 21 + module Preload : Datatype.S 22 + 23 + (** List of all datatypes defined in this module *) 24 + val datatypes : Datatype.t list
+32
lib/html5_checker/datatype/dt_referrer.ml
··· 1 + (** Referrer policy attribute validation for HTML5 *) 2 + 3 + module Referrer_policy = struct 4 + let name = "referrerpolicy" 5 + 6 + let validate s = 7 + let s_lower = Datatype.string_to_ascii_lowercase s in 8 + match s_lower with 9 + | "" 10 + | "no-referrer" 11 + | "no-referrer-when-downgrade" 12 + | "origin" 13 + | "origin-when-cross-origin" 14 + | "same-origin" 15 + | "strict-origin" 16 + | "strict-origin-when-cross-origin" 17 + | "unsafe-url" -> 18 + Ok () 19 + | _ -> 20 + Error 21 + (Printf.sprintf 22 + "The value '%s' is not a valid referrerpolicy value. Expected \ 23 + one of: empty string, 'no-referrer', \ 24 + 'no-referrer-when-downgrade', 'origin', \ 25 + 'origin-when-cross-origin', 'same-origin', 'strict-origin', \ 26 + 'strict-origin-when-cross-origin', or 'unsafe-url'." 27 + s) 28 + 29 + let is_valid s = Result.is_ok (validate s) 30 + end 31 + 32 + let datatypes = [ (module Referrer_policy : Datatype.S) ]
+29
lib/html5_checker/datatype/dt_referrer.mli
··· 1 + (** Referrer policy attribute datatype validator. 2 + 3 + This module provides a validator for the referrerpolicy attribute used to 4 + control referrer information sent with requests, as defined by the HTML5 spec. *) 5 + 6 + (** Referrer policy attribute validator. 7 + 8 + Validates referrerpolicy attribute values which can be: 9 + - "" (empty string, uses default policy) 10 + - "no-referrer" (never send referrer) 11 + - "no-referrer-when-downgrade" (send referrer to same security level) 12 + - "origin" (send origin only) 13 + - "origin-when-cross-origin" (full URL for same-origin, origin for cross-origin) 14 + - "same-origin" (send referrer for same-origin only) 15 + - "strict-origin" (send origin for same security level) 16 + - "strict-origin-when-cross-origin" (full URL same-origin, origin cross-origin same security) 17 + - "unsafe-url" (always send full URL) 18 + 19 + Values are case-insensitive after ASCII lowercasing. 20 + 21 + Examples: 22 + - "" 23 + - "no-referrer" 24 + - "origin" 25 + - "strict-origin-when-cross-origin" *) 26 + module Referrer_policy : Datatype.S 27 + 28 + (** List of all datatypes defined in this module *) 29 + val datatypes : Datatype.t list
+53
lib/html5_checker/datatype/dt_sandbox.ml
··· 1 + (** Sandbox tokens validation *) 2 + 3 + (** Valid sandbox tokens (case-sensitive) *) 4 + let valid_sandbox_tokens = 5 + [ 6 + "allow-downloads"; 7 + "allow-forms"; 8 + "allow-modals"; 9 + "allow-orientation-lock"; 10 + "allow-pointer-lock"; 11 + "allow-popups"; 12 + "allow-popups-to-escape-sandbox"; 13 + "allow-presentation"; 14 + "allow-same-origin"; 15 + "allow-scripts"; 16 + "allow-top-navigation"; 17 + "allow-top-navigation-by-user-activation"; 18 + "allow-top-navigation-to-custom-protocols"; 19 + ] 20 + 21 + (** Validate sandbox attribute value *) 22 + let validate_sandbox s = 23 + let trimmed = Datatype.trim_html_spaces s in 24 + (* Empty value is valid (maximum restrictions) *) 25 + if trimmed = "" then Ok () 26 + else 27 + (* Split on whitespace *) 28 + let tokens = String.split_on_char ' ' trimmed in 29 + let tokens = List.filter (fun t -> Datatype.trim_html_spaces t <> "") tokens in 30 + if tokens = [] then Ok () (* All whitespace is like empty *) 31 + else 32 + (* Validate each token *) 33 + let rec check_tokens = function 34 + | [] -> Ok () 35 + | token :: rest -> 36 + (* Sandbox tokens are case-sensitive *) 37 + if List.mem token valid_sandbox_tokens then check_tokens rest 38 + else 39 + Error 40 + (Printf.sprintf 41 + "The value '%s' is not a valid sandbox token. Valid tokens \ 42 + are: %s" 43 + token (String.concat ", " valid_sandbox_tokens)) 44 + in 45 + check_tokens tokens 46 + 47 + module Sandbox = struct 48 + let name = "sandbox" 49 + let validate = validate_sandbox 50 + let is_valid s = Result.is_ok (validate s) 51 + end 52 + 53 + let datatypes = [ (module Sandbox : Datatype.S) ]
+37
lib/html5_checker/datatype/dt_sandbox.mli
··· 1 + (** Sandbox tokens validator. 2 + 3 + This module provides a validator for the sandbox attribute used on iframe 4 + elements, as defined by the HTML5 specification. *) 5 + 6 + (** Sandbox attribute validator. 7 + 8 + Validates sandbox attribute values which contain space-separated sandbox 9 + tokens. Each token enables a specific capability for the sandboxed iframe. 10 + 11 + Valid tokens: 12 + - allow-downloads: Allow downloads 13 + - allow-forms: Allow form submission 14 + - allow-modals: Allow modal dialogs (alert, confirm, etc.) 15 + - allow-orientation-lock: Allow orientation lock 16 + - allow-pointer-lock: Allow pointer lock 17 + - allow-popups: Allow popups (window.open, target="_blank", etc.) 18 + - allow-popups-to-escape-sandbox: Allow popups that don't inherit sandboxing 19 + - allow-presentation: Allow presentation sessions 20 + - allow-same-origin: Allow same-origin access 21 + - allow-scripts: Allow script execution 22 + - allow-top-navigation: Allow navigating top-level browsing context 23 + - allow-top-navigation-by-user-activation: Allow top navigation with user gesture 24 + - allow-top-navigation-to-custom-protocols: Allow top navigation to custom protocols 25 + 26 + Examples: 27 + - "" (empty = maximum restrictions) 28 + - "allow-scripts" 29 + - "allow-same-origin allow-scripts" 30 + - "allow-forms allow-popups allow-scripts" 31 + 32 + Tokens are case-sensitive and must match exactly. Duplicate tokens are 33 + allowed but redundant. An empty value means maximum sandbox restrictions. *) 34 + module Sandbox : Datatype.S 35 + 36 + (** List of all datatypes defined in this module *) 37 + val datatypes : Datatype.t list
+22
lib/html5_checker/datatype/dt_scope.ml
··· 1 + (** Table header scope attribute validation based on HTML5 spec *) 2 + 3 + (** Valid scope values *) 4 + let valid_scopes = [ "row"; "col"; "rowgroup"; "colgroup" ] 5 + 6 + module Scope = struct 7 + let name = "scope" 8 + 9 + let validate s = 10 + let s_lower = Datatype.string_to_ascii_lowercase s in 11 + if List.mem s_lower valid_scopes then Ok () 12 + else 13 + Error 14 + (Printf.sprintf 15 + "The value '%s' is not a valid scope value. Expected one of: %s." 16 + s 17 + (String.concat ", " valid_scopes)) 18 + 19 + let is_valid s = Result.is_ok (validate s) 20 + end 21 + 22 + let datatypes = [ (module Scope : Datatype.S) ]
+24
lib/html5_checker/datatype/dt_scope.mli
··· 1 + (** Table header scope attribute datatype validator. 2 + 3 + This module provides a validator for the scope attribute used on 4 + th elements, as defined by the HTML5 specification. *) 5 + 6 + (** Table header scope attribute validator. 7 + 8 + Validates th scope attribute values which can be: 9 + - row - Header cell applies to some of the subsequent cells in the same row(s) 10 + - col - Header cell applies to some of the subsequent cells in the same column(s) 11 + - rowgroup - Header cell applies to all remaining cells in the row group 12 + - colgroup - Header cell applies to all remaining cells in the column group 13 + 14 + Values are matched case-insensitively according to HTML5 spec. 15 + 16 + Examples: 17 + - "row" 18 + - "col" 19 + - "rowgroup" 20 + - "colgroup" *) 21 + module Scope : Datatype.S 22 + 23 + (** List of all datatypes defined in this module *) 24 + val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_shape.ml
··· 1 + (** Shape attribute validation for HTML5 *) 2 + 3 + module Shape = struct 4 + let name = "shape" 5 + 6 + let validate s = 7 + let s_lower = Datatype.string_to_ascii_lowercase s in 8 + match s_lower with 9 + | "default" | "rect" | "circle" | "poly" -> Ok () 10 + | _ -> 11 + Error 12 + (Printf.sprintf 13 + "The value '%s' is not a valid shape. Expected 'default', 'rect', \ 14 + 'circle', or 'poly'." 15 + s) 16 + 17 + let is_valid s = Result.is_ok (validate s) 18 + end 19 + 20 + let datatypes = [ (module Shape : Datatype.S) ]
+24
lib/html5_checker/datatype/dt_shape.mli
··· 1 + (** Shape attribute datatype validator for HTML5. 2 + 3 + This module provides a validator for the shape attribute used on area 4 + elements within image maps, as defined by the HTML5 specification. *) 5 + 6 + (** Shape attribute validator. 7 + 8 + Validates shape attribute values which can be: 9 + - "default" - entire region 10 + - "rect" - rectangular region 11 + - "circle" - circular region 12 + - "poly" - polygonal region 13 + 14 + Values are case-insensitive. 15 + 16 + Examples: 17 + - "rect" 18 + - "circle" 19 + - "poly" 20 + - "default" *) 21 + module Shape : Datatype.S 22 + 23 + (** List of all datatypes defined in this module *) 24 + val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_spellcheck.ml
··· 1 + (** Spellcheck attribute validation for HTML5 *) 2 + 3 + module Spellcheck = struct 4 + let name = "spellcheck" 5 + 6 + let validate s = 7 + let s_lower = Datatype.string_to_ascii_lowercase s in 8 + match s_lower with 9 + | "" | "true" | "false" -> Ok () 10 + | _ -> 11 + Error 12 + (Printf.sprintf 13 + "The value '%s' is not a valid spellcheck value. Expected 'true', \ 14 + 'false', or empty string." 15 + s) 16 + 17 + let is_valid s = Result.is_ok (validate s) 18 + end 19 + 20 + let datatypes = [ (module Spellcheck : Datatype.S) ]
+22
lib/html5_checker/datatype/dt_spellcheck.mli
··· 1 + (** Spellcheck attribute datatype validator for HTML5. 2 + 3 + This module provides a validator for the spellcheck attribute, as defined by 4 + the HTML5 specification. *) 5 + 6 + (** Spellcheck attribute validator. 7 + 8 + Validates spellcheck attribute values which can be: 9 + - "true" - spelling and grammar checking is enabled 10 + - "false" - spelling and grammar checking is disabled 11 + - "" (empty string) - default behavior (typically inherits from parent) 12 + 13 + Values are case-insensitive. 14 + 15 + Examples: 16 + - "true" 17 + - "false" 18 + - "" *) 19 + module Spellcheck : Datatype.S 20 + 21 + (** List of all datatypes defined in this module *) 22 + val datatypes : Datatype.t list
+142
lib/html5_checker/datatype/dt_srcset.ml
··· 1 + (** Image source set and sizes attribute validation *) 2 + 3 + (** Split string on commas, preserving parentheses groups *) 4 + let split_on_commas s = 5 + let len = String.length s in 6 + let rec find_splits paren_depth start i acc = 7 + if i >= len then 8 + if start < len then List.rev (String.sub s start (len - start) :: acc) 9 + else List.rev acc 10 + else 11 + match s.[i] with 12 + | '(' -> find_splits (paren_depth + 1) start (i + 1) acc 13 + | ')' -> find_splits (max 0 (paren_depth - 1)) start (i + 1) acc 14 + | ',' when paren_depth = 0 -> 15 + let part = String.sub s start (i - start) in 16 + find_splits 0 (i + 1) (i + 1) (part :: acc) 17 + | _ -> find_splits paren_depth start (i + 1) acc 18 + in 19 + find_splits 0 0 0 [] 20 + 21 + (** Parse a descriptor (width or pixel density) *) 22 + let parse_descriptor s = 23 + let trimmed = Datatype.trim_html_spaces s in 24 + let len = String.length trimmed in 25 + if len < 2 then None 26 + else 27 + let suffix = trimmed.[len - 1] in 28 + let num_part = String.sub trimmed 0 (len - 1) in 29 + match suffix with 30 + | 'w' -> 31 + (try 32 + let n = int_of_string num_part in 33 + if n > 0 then Some (`Width n) else None 34 + with _ -> None) 35 + | 'x' -> 36 + (try 37 + let f = float_of_string num_part in 38 + if f > 0.0 then Some (`Density f) else None 39 + with _ -> None) 40 + | _ -> None 41 + 42 + (** Validate a single image candidate *) 43 + let validate_image_candidate s = 44 + let trimmed = Datatype.trim_html_spaces s in 45 + if trimmed = "" then Error "Image candidate must not be empty" 46 + else 47 + (* Split on whitespace to get URL and optional descriptor *) 48 + let parts = String.split_on_char ' ' trimmed in 49 + let parts = List.filter (fun p -> Datatype.trim_html_spaces p <> "") parts in 50 + match parts with 51 + | [] -> Error "Image candidate must not be empty" 52 + | [ _url ] -> Ok None (* Just URL, no descriptor *) 53 + | [ _url; desc ] -> ( 54 + match parse_descriptor desc with 55 + | Some d -> Ok (Some d) 56 + | None -> 57 + Error 58 + (Printf.sprintf 59 + "Invalid descriptor '%s'. Must be a positive integer followed \ 60 + by 'w' or a positive number followed by 'x'" 61 + desc)) 62 + | _ -> 63 + Error 64 + "Image candidate must be a URL optionally followed by one descriptor" 65 + 66 + (** Validate srcset value *) 67 + let validate_srcset s = 68 + let trimmed = Datatype.trim_html_spaces s in 69 + if trimmed = "" then Error "Srcset must not be empty" else 70 + let candidates = split_on_commas trimmed in 71 + let candidates = List.filter (fun c -> Datatype.trim_html_spaces c <> "") candidates in 72 + if candidates = [] then Error "Srcset must contain at least one image candidate" 73 + else 74 + (* Validate each candidate and check for descriptor type consistency *) 75 + let rec check_candidates has_width has_density = function 76 + | [] -> Ok () 77 + | cand :: rest -> ( 78 + match validate_image_candidate cand with 79 + | Error e -> Error e 80 + | Ok None -> check_candidates has_width has_density rest 81 + | Ok (Some (`Width _)) -> 82 + if has_density then 83 + Error 84 + "Cannot mix width descriptors (w) and pixel density \ 85 + descriptors (x) in the same srcset" 86 + else check_candidates true has_density rest 87 + | Ok (Some (`Density _)) -> 88 + if has_width then 89 + Error 90 + "Cannot mix width descriptors (w) and pixel density \ 91 + descriptors (x) in the same srcset" 92 + else check_candidates has_width true rest) 93 + in 94 + check_candidates false false candidates 95 + 96 + module Srcset = struct 97 + let name = "srcset" 98 + let validate = validate_srcset 99 + let is_valid s = Result.is_ok (validate s) 100 + end 101 + 102 + (** Validate sizes attribute *) 103 + let validate_sizes s = 104 + let trimmed = Datatype.trim_html_spaces s in 105 + if trimmed = "" then Error "Sizes attribute must not be empty" 106 + else 107 + (* Split on commas *) 108 + let entries = split_on_commas trimmed in 109 + let entries = List.filter (fun e -> Datatype.trim_html_spaces e <> "") entries in 110 + if entries = [] then Error "Sizes attribute must contain at least one entry" 111 + else 112 + (* Each entry except the last should have a media condition 113 + The last entry is just a size value 114 + We do basic validation here *) 115 + let rec check_entries = function 116 + | [] -> Ok () 117 + | [ _last ] -> 118 + (* Last entry - just a size value, accept anything non-empty *) 119 + Ok () 120 + | entry :: rest -> 121 + let entry_trimmed = Datatype.trim_html_spaces entry in 122 + (* Check if it looks like it has a media condition (starts with '(') *) 123 + if String.length entry_trimmed = 0 then 124 + Error "Size entry must not be empty" 125 + else if entry_trimmed.[0] <> '(' then 126 + Error 127 + (Printf.sprintf 128 + "Size entry '%s' should start with a media condition in \ 129 + parentheses" 130 + entry_trimmed) 131 + else check_entries rest 132 + in 133 + check_entries entries 134 + 135 + module Sizes = struct 136 + let name = "sizes" 137 + let validate = validate_sizes 138 + let is_valid s = Result.is_ok (validate s) 139 + end 140 + 141 + let datatypes = 142 + [ (module Srcset : Datatype.S); (module Sizes : Datatype.S) ]
+44
lib/html5_checker/datatype/dt_srcset.mli
··· 1 + (** Image source set and sizes attribute validators. 2 + 3 + This module provides validators for srcset and sizes attributes used on 4 + img and source elements, as defined by the HTML5 specification. *) 5 + 6 + (** Srcset attribute validator. 7 + 8 + Validates srcset attribute values which contain comma-separated image 9 + candidates. Each image candidate consists of: 10 + - A URL 11 + - Optional whitespace followed by a width descriptor (e.g., "100w") or 12 + pixel density descriptor (e.g., "2x") 13 + 14 + Examples: 15 + - "image.jpg" 16 + - "image.jpg 1x" 17 + - "image-320.jpg 320w, image-640.jpg 640w" 18 + - "image-1x.jpg 1x, image-2x.jpg 2x" 19 + 20 + Width descriptors must be positive integers followed by 'w'. 21 + Pixel density descriptors must be positive numbers followed by 'x'. 22 + Cannot mix width and density descriptors in the same srcset. *) 23 + module Srcset : Datatype.S 24 + 25 + (** Sizes attribute validator. 26 + 27 + Validates sizes attribute values which contain comma-separated source size 28 + entries. Each entry (except the last) consists of: 29 + - A media condition 30 + - Whitespace 31 + - A source size value (length or "auto") 32 + 33 + The last entry is just a source size value (without media condition). 34 + 35 + Examples: 36 + - "100vw" 37 + - "(max-width: 600px) 100vw, 50vw" 38 + - "(min-width: 800px) 800px, 100vw" 39 + 40 + This validator performs basic syntax checking. *) 41 + module Sizes : Datatype.S 42 + 43 + (** List of all datatypes defined in this module *) 44 + val datatypes : Datatype.t list
+42
lib/html5_checker/datatype/dt_target.ml
··· 1 + (** Browsing context and target attribute validation *) 2 + 3 + (** Valid special target keywords (case-insensitive) *) 4 + let special_keywords = [ "_blank"; "_self"; "_parent"; "_top" ] 5 + 6 + (** Validate a browsing context name *) 7 + let validate_browsing_context s = 8 + if String.length s = 0 then Error "Browsing context name must not be empty" 9 + else if s.[0] = '_' then 10 + (* If starts with underscore, must be a special keyword *) 11 + let lower = Datatype.string_to_ascii_lowercase s in 12 + if List.mem lower special_keywords then Ok () 13 + else 14 + Error 15 + (Printf.sprintf 16 + "Browsing context name '%s' starts with underscore but is not one \ 17 + of the special keywords: %s" 18 + s (String.concat ", " special_keywords)) 19 + else Ok () 20 + 21 + module Browsing_context = struct 22 + let name = "browsing-context" 23 + let validate = validate_browsing_context 24 + let is_valid s = Result.is_ok (validate s) 25 + end 26 + 27 + (** Validate a target attribute value 28 + (For now, this is the same as browsing context validation) *) 29 + let validate_target s = 30 + if String.length s = 0 then Error "Target attribute must not be empty" 31 + else validate_browsing_context s 32 + 33 + module Target = struct 34 + let name = "target" 35 + let validate = validate_target 36 + let is_valid s = Result.is_ok (validate s) 37 + end 38 + 39 + let datatypes = 40 + [ 41 + (module Target : Datatype.S); (module Browsing_context : Datatype.S); 42 + ]
+50
lib/html5_checker/datatype/dt_target.mli
··· 1 + (** Browsing context and target attribute validators. 2 + 3 + This module provides validators for browsing context names and target 4 + attributes used on a, area, base, and form elements, as defined by the 5 + HTML5 specification. *) 6 + 7 + (** Target attribute validator. 8 + 9 + Validates target attribute values which specify where to display linked 10 + content or form responses. Valid values include: 11 + 12 + Special keywords (case-insensitive): 13 + - _blank: New window or tab 14 + - _self: Same frame (default) 15 + - _parent: Parent frame 16 + - _top: Top-level window 17 + 18 + Or a valid browsing context name: 19 + - Non-empty string 20 + - If starts with underscore, must be one of the special keywords above 21 + 22 + Examples: 23 + - "_blank" 24 + - "_self" 25 + - "myframe" 26 + - "content-frame" *) 27 + module Target : Datatype.S 28 + 29 + (** Browsing context name validator. 30 + 31 + Validates browsing context names used to identify frames and windows. 32 + A valid browsing context name is: 33 + - A non-empty string 34 + - If it starts with an underscore (_), it must be one of the special 35 + keywords: _blank, _self, _parent, or _top (case-insensitive) 36 + - Otherwise, any non-empty string is valid 37 + 38 + Examples: 39 + - "myframe" 40 + - "content" 41 + - "navigation-frame" 42 + - "_blank" (special keyword) 43 + 44 + Invalid examples: 45 + - "" (empty string) 46 + - "_custom" (underscore prefix but not a special keyword) *) 47 + module Browsing_context : Datatype.S 48 + 49 + (** List of all datatypes defined in this module *) 50 + val datatypes : Datatype.t list
+20
lib/html5_checker/datatype/dt_translate.ml
··· 1 + (** Translate attribute validation for HTML5 *) 2 + 3 + module Translate = struct 4 + let name = "translate" 5 + 6 + let validate s = 7 + let s_lower = Datatype.string_to_ascii_lowercase s in 8 + match s_lower with 9 + | "" | "yes" | "no" -> Ok () 10 + | _ -> 11 + Error 12 + (Printf.sprintf 13 + "The value '%s' is not a valid translate value. Expected 'yes', \ 14 + 'no', or empty string." 15 + s) 16 + 17 + let is_valid s = Result.is_ok (validate s) 18 + end 19 + 20 + let datatypes = [ (module Translate : Datatype.S) ]
+22
lib/html5_checker/datatype/dt_translate.mli
··· 1 + (** Translate attribute datatype validator for HTML5. 2 + 3 + This module provides a validator for the translate attribute, as defined by 4 + the HTML5 specification. *) 5 + 6 + (** Translate attribute validator. 7 + 8 + Validates translate attribute values which can be: 9 + - "yes" - the element should be translated 10 + - "no" - the element should not be translated 11 + - "" (empty string) - equivalent to "yes" 12 + 13 + Values are case-insensitive. 14 + 15 + Examples: 16 + - "yes" 17 + - "no" 18 + - "" *) 19 + module Translate : Datatype.S 20 + 21 + (** List of all datatypes defined in this module *) 22 + val datatypes : Datatype.t list
+124
lib/html5_checker/datatype/dt_url.ml
··· 1 + (** URL and IRI datatype validators for HTML5. *) 2 + 3 + (** Check if a character is valid in a URL scheme name. 4 + Scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) *) 5 + let is_scheme_char_initial = function 'a' .. 'z' | 'A' .. 'Z' -> true | _ -> false 6 + 7 + let is_scheme_char_subsequent = function 8 + | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '+' | '-' | '.' -> true 9 + | _ -> false 10 + 11 + (** Split a URL into scheme and remainder. 12 + Returns Some (scheme, rest) if a valid scheme is found, None otherwise. 13 + Skips leading HTML whitespace before checking for scheme. *) 14 + let split_scheme s = 15 + let len = String.length s in 16 + (* Skip leading HTML whitespace *) 17 + let rec skip_whitespace i = 18 + if i >= len then len 19 + else if Datatype.is_whitespace s.[i] then skip_whitespace (i + 1) 20 + else i 21 + in 22 + let start = skip_whitespace 0 in 23 + if start >= len then None 24 + else if not (is_scheme_char_initial s.[start]) then None 25 + else 26 + (* Look for scheme *) 27 + let rec find_colon i = 28 + if i >= len then None 29 + else 30 + match s.[i] with 31 + | ':' -> 32 + let scheme = 33 + String.sub s start (i - start) |> Datatype.string_to_ascii_lowercase 34 + in 35 + let rest = String.sub s (i + 1) (len - i - 1) in 36 + Some (scheme, rest) 37 + | c when is_scheme_char_subsequent c -> find_colon (i + 1) 38 + | _ -> None 39 + in 40 + find_colon (start + 1) 41 + 42 + (** Check if a scheme is well-known (http, https, ftp, mailto, file) *) 43 + let is_well_known_scheme = function 44 + | "http" | "https" | "ftp" | "mailto" | "file" -> true 45 + | _ -> false 46 + 47 + module Url : Datatype.S = struct 48 + let name = "URL" 49 + 50 + let validate s = 51 + let trimmed = Datatype.trim_html_spaces s in 52 + if trimmed = "" then Error "Must be non-empty." 53 + else 54 + (* Basic validation - check for control characters *) 55 + let len = String.length s in 56 + let rec check_chars i = 57 + if i >= len then Ok () 58 + else 59 + match s.[i] with 60 + | '\x00' .. '\x1F' | '\x7F' -> 61 + Error "URLs must not contain control characters." 62 + | _ -> check_chars (i + 1) 63 + in 64 + check_chars 0 65 + 66 + let is_valid s = Result.is_ok (validate s) 67 + end 68 + 69 + module Url_potentially_empty : Datatype.S = struct 70 + let name = "URL (potentially empty)" 71 + 72 + let validate s = 73 + let trimmed = Datatype.trim_html_spaces s in 74 + if trimmed = "" then Ok () 75 + else 76 + (* Use same validation as Url for non-empty values *) 77 + Url.validate s 78 + 79 + let is_valid s = Result.is_ok (validate s) 80 + end 81 + 82 + module Url_absolute : Datatype.S = struct 83 + let name = "absolute URL" 84 + 85 + let validate s = 86 + let trimmed = Datatype.trim_html_spaces s in 87 + if trimmed = "" then Error "Must be non-empty." 88 + else 89 + match split_scheme s with 90 + | None -> 91 + Error (Printf.sprintf "The string \"%s\" is not an absolute URL." s) 92 + | Some (scheme, _rest) -> 93 + if is_well_known_scheme scheme || String.length scheme > 0 then 94 + (* For well-known schemes, we could do more validation, 95 + but for now we just check that it has a scheme *) 96 + Ok () 97 + else Error "The string is not an absolute URL." 98 + 99 + let is_valid s = Result.is_ok (validate s) 100 + end 101 + 102 + module Iri : Datatype.S = struct 103 + let name = "absolute URL" 104 + 105 + (* IRI validation is the same as absolute URL validation *) 106 + let validate = Url_absolute.validate 107 + let is_valid s = Result.is_ok (validate s) 108 + end 109 + 110 + module Iri_ref : Datatype.S = struct 111 + let name = "URL" 112 + 113 + (* IRI reference validation is the same as URL validation *) 114 + let validate = Url.validate 115 + let is_valid s = Result.is_ok (validate s) 116 + end 117 + 118 + let datatypes = 119 + [ (module Url : Datatype.S) 120 + ; (module Url_potentially_empty : Datatype.S) 121 + ; (module Url_absolute : Datatype.S) 122 + ; (module Iri : Datatype.S) 123 + ; (module Iri_ref : Datatype.S) 124 + ]
+48
lib/html5_checker/datatype/dt_url.mli
··· 1 + (** URL and IRI datatype validators for HTML5. 2 + 3 + This module provides validators for URLs and Internationalized Resource 4 + Identifiers (IRIs) based on the Nu HTML Checker's implementation. 5 + 6 + The validators perform basic structural validation. Full URL parsing and 7 + validation according to WHATWG URL spec would require a complete URL parser. *) 8 + 9 + (** URL validator (IriRef in Nu validator). 10 + 11 + Validates URL references which can be either absolute or relative URLs. 12 + Basic validation ensures: 13 + - Non-empty after trimming HTML whitespace 14 + - No control characters 15 + - Basic structural correctness 16 + 17 + This corresponds to the general URL/IRI reference type. *) 18 + module Url : Datatype.S 19 + 20 + (** URL validator that allows empty values. 21 + 22 + Same as Url but permits empty strings. This is used for optional URL 23 + attributes. *) 24 + module Url_potentially_empty : Datatype.S 25 + 26 + (** Absolute URL validator (Iri in Nu validator). 27 + 28 + Validates that a URL is absolute (has a scheme). An absolute URL must: 29 + - Start with a valid scheme (e.g., http:, https:, ftp:, mailto:) 30 + - Not be empty 31 + - Follow URL structure rules 32 + 33 + Scheme format: ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) *) 34 + module Url_absolute : Datatype.S 35 + 36 + (** IRI (Internationalized Resource Identifier) validator. 37 + 38 + Alias for Url_absolute. IRIs are the internationalized version of URIs, 39 + allowing Unicode characters. *) 40 + module Iri : Datatype.S 41 + 42 + (** IRI reference validator. 43 + 44 + Alias for Url. IRI references can be relative or absolute. *) 45 + module Iri_ref : Datatype.S 46 + 47 + (** List of all URL/IRI-related datatypes for registration. *) 48 + val datatypes : Datatype.t list
+22
lib/html5_checker/datatype/dt_wrap.ml
··· 1 + (** Textarea wrap attribute validation based on HTML5 spec *) 2 + 3 + (** Valid wrap values *) 4 + let valid_wraps = [ "soft"; "hard" ] 5 + 6 + module Wrap = struct 7 + let name = "wrap" 8 + 9 + let validate s = 10 + let s_lower = Datatype.string_to_ascii_lowercase s in 11 + if List.mem s_lower valid_wraps then Ok () 12 + else 13 + Error 14 + (Printf.sprintf 15 + "The value '%s' is not a valid wrap value. Expected one of: %s." 16 + s 17 + (String.concat ", " valid_wraps)) 18 + 19 + let is_valid s = Result.is_ok (validate s) 20 + end 21 + 22 + let datatypes = [ (module Wrap : Datatype.S) ]
+20
lib/html5_checker/datatype/dt_wrap.mli
··· 1 + (** Textarea wrap attribute datatype validator. 2 + 3 + This module provides a validator for the wrap attribute used on 4 + textarea elements, as defined by the HTML5 specification. *) 5 + 6 + (** Textarea wrap attribute validator. 7 + 8 + Validates textarea wrap attribute values which can be: 9 + - soft - Soft wrapping (line breaks not submitted, default) 10 + - hard - Hard wrapping (line breaks are submitted) 11 + 12 + Values are matched case-insensitively according to HTML5 spec. 13 + 14 + Examples: 15 + - "soft" 16 + - "hard" *) 17 + module Wrap : Datatype.S 18 + 19 + (** List of all datatypes defined in this module *) 20 + val datatypes : Datatype.t list
+94
lib/html5_checker/dom_walker.ml
··· 1 + (** DOM tree traversal for HTML5 conformance checking. *) 2 + 3 + (** Package a checker with its state for traversal. *) 4 + type checker_state = { 5 + start_element : 6 + name:string -> 7 + namespace:string option -> 8 + attrs:(string * string) list -> 9 + Message_collector.t -> 10 + unit; 11 + end_element : 12 + name:string -> namespace:string option -> Message_collector.t -> unit; 13 + characters : string -> Message_collector.t -> unit; 14 + end_document : Message_collector.t -> unit; 15 + } 16 + 17 + (** Create a checker state package from a first-class module. *) 18 + let make_checker_state (module C : Checker.S) = 19 + let state = C.create () in 20 + { 21 + start_element = (fun ~name ~namespace ~attrs collector -> 22 + C.start_element state ~name ~namespace ~attrs collector); 23 + end_element = (fun ~name ~namespace collector -> 24 + C.end_element state ~name ~namespace collector); 25 + characters = (fun text collector -> 26 + C.characters state text collector); 27 + end_document = (fun collector -> 28 + C.end_document state collector); 29 + } 30 + 31 + (** Walk a DOM node with a single checker state. *) 32 + let rec walk_node_single cs collector node = 33 + let open Html5rw.Dom in 34 + match node.name with 35 + | "#text" -> 36 + (* Text node: emit characters event *) 37 + cs.characters node.data collector 38 + | "#comment" -> 39 + (* Comment node: emit characters event with comment text *) 40 + cs.characters node.data collector 41 + | "#document" | "#document-fragment" -> 42 + (* Document/fragment nodes: just traverse children *) 43 + List.iter (walk_node_single cs collector) node.children 44 + | "!doctype" -> 45 + (* Doctype node: skip (no validation events for doctype) *) 46 + () 47 + | _ -> 48 + (* Element node: emit start, traverse children, emit end *) 49 + cs.start_element ~name:node.name ~namespace:node.namespace ~attrs:node.attrs collector; 50 + List.iter (walk_node_single cs collector) node.children; 51 + cs.end_element ~name:node.name ~namespace:node.namespace collector 52 + 53 + let walk checker collector node = 54 + let cs = make_checker_state checker in 55 + walk_node_single cs collector node; 56 + cs.end_document collector 57 + 58 + (** Walk a DOM node with multiple checker states. *) 59 + let rec walk_node_all css collector node = 60 + let open Html5rw.Dom in 61 + match node.name with 62 + | "#text" -> 63 + (* Text node: emit characters event to all checkers *) 64 + List.iter (fun cs -> cs.characters node.data collector) css 65 + | "#comment" -> 66 + (* Comment node: emit characters event with comment text to all checkers *) 67 + List.iter (fun cs -> cs.characters node.data collector) css 68 + | "#document" | "#document-fragment" -> 69 + (* Document/fragment nodes: just traverse children *) 70 + List.iter (walk_node_all css collector) node.children 71 + | "!doctype" -> 72 + (* Doctype node: skip *) 73 + () 74 + | _ -> 75 + (* Element node: emit start to all checkers, traverse children, emit end to all *) 76 + List.iter (fun cs -> 77 + cs.start_element ~name:node.name ~namespace:node.namespace ~attrs:node.attrs collector 78 + ) css; 79 + List.iter (walk_node_all css collector) node.children; 80 + List.iter (fun cs -> 81 + cs.end_element ~name:node.name ~namespace:node.namespace collector 82 + ) css 83 + 84 + let walk_all checkers collector node = 85 + (* Create checker state packages *) 86 + let css = List.map make_checker_state checkers in 87 + (* Traverse with all checkers *) 88 + walk_node_all css collector node; 89 + (* Call end_document on all checkers *) 90 + List.iter (fun cs -> cs.end_document collector) css 91 + 92 + let walk_registry registry collector node = 93 + let checkers = Checker_registry.all registry in 94 + walk_all checkers collector node
+197
lib/html5_checker/dom_walker.mli
··· 1 + (** DOM tree traversal for HTML5 conformance checking. 2 + 3 + This module provides functions to traverse DOM trees and apply checkers 4 + to validate HTML5 documents. It implements a depth-first, in-order 5 + traversal that visits every node in the tree and notifies checkers 6 + of traversal events. 7 + 8 + {2 Traversal Model} 9 + 10 + The walker follows a SAX-like event model, emitting events as it 11 + encounters different node types during traversal: 12 + 13 + {v 14 + Document 15 + └── html (start_element "html") 16 + ├── head (start_element "head") 17 + │ └── title (start_element "title") 18 + │ ├── #text "Page Title" (characters) 19 + │ └── (end_element "title") 20 + └── body (start_element "body") 21 + └── p (start_element "p") 22 + ├── #text "Hello " (characters) 23 + ├── b (start_element "b") 24 + │ ├── #text "world" (characters) 25 + │ └── (end_element "b") 26 + ├── #text "!" (characters) 27 + └── (end_element "p") 28 + end_document 29 + v} 30 + 31 + {2 Event Sequence} 32 + 33 + For each element node: 34 + 1. {!Checker.S.start_element} is called when entering the element 35 + 2. Children are recursively traversed 36 + 3. {!Checker.S.end_element} is called when exiting the element 37 + 38 + For text and comment nodes: 39 + - {!Checker.S.characters} is called with the text content 40 + 41 + After the entire tree is traversed: 42 + - {!Checker.S.end_document} is called on all checkers 43 + 44 + {2 Checker Coordination} 45 + 46 + When multiple checkers are used: 47 + - All checkers receive the same event sequence 48 + - Events are delivered to checkers in the order they appear in the list 49 + - Each checker maintains independent state 50 + - Messages from all checkers are collected together 51 + 52 + This allows composing orthogonal validation rules without interference. 53 + 54 + {2 Usage Examples} 55 + 56 + {b Single checker:} 57 + {[ 58 + let checker = Checker.noop () in 59 + let collector = Message_collector.create () in 60 + walk checker collector dom; 61 + let messages = Message_collector.messages collector in 62 + List.iter Message.pp messages 63 + ]} 64 + 65 + {b Multiple checkers:} 66 + {[ 67 + let checkers = [checker1; checker2; checker3] in 68 + let collector = Message_collector.create () in 69 + walk_all checkers collector dom; 70 + (* Analyze messages from all checkers *) 71 + ]} 72 + 73 + {b Registry of checkers:} 74 + {[ 75 + let registry = Checker_registry.default () in 76 + let collector = Message_collector.create () in 77 + walk_registry registry collector dom; 78 + (* All registered checkers have validated the DOM *) 79 + ]} *) 80 + 81 + (** {1 Single Checker Traversal} *) 82 + 83 + val walk : Checker.t -> Message_collector.t -> Html5rw.Dom.node -> unit 84 + (** [walk checker collector node] traverses a DOM tree with a single checker. 85 + 86 + @param checker The checker to apply during traversal 87 + @param collector The message collector for validation messages 88 + @param node The root node to start traversal from 89 + 90 + The traversal is depth-first and in-order: for each element, the 91 + checker receives a {!Checker.S.start_element} event, then children 92 + are recursively traversed, then an {!Checker.S.end_element} event 93 + is emitted. 94 + 95 + After the entire tree is traversed, {!Checker.S.end_document} is 96 + called to allow the checker to emit any final validation messages. 97 + 98 + {b Example:} 99 + {[ 100 + (* Validate a parsed HTML document *) 101 + let checker = Checker.noop () in 102 + let collector = Message_collector.create () in 103 + walk checker collector document_node; 104 + 105 + (* Check for errors *) 106 + let messages = Message_collector.messages collector in 107 + let errors = List.filter 108 + (fun msg -> msg.Message.severity = Message.Error) 109 + messages in 110 + if errors <> [] then 111 + Printf.printf "Found %d errors\n" (List.length errors) 112 + ]} 113 + 114 + {b Notes:} 115 + - Only element nodes trigger start/end events 116 + - Text and comment nodes trigger character events 117 + - Document and doctype nodes are silently skipped 118 + - The traversal follows document order (parent before children, 119 + earlier siblings before later ones) *) 120 + 121 + (** {1 Multiple Checker Traversal} *) 122 + 123 + val walk_all : 124 + Checker.t list -> Message_collector.t -> Html5rw.Dom.node -> unit 125 + (** [walk_all checkers collector node] traverses a DOM tree with multiple 126 + checkers. 127 + 128 + @param checkers List of checkers to apply during traversal 129 + @param collector The message collector for validation messages 130 + @param node The root node to start traversal from 131 + 132 + This performs a single tree traversal, delivering each event to all 133 + checkers in sequence. This is more efficient than calling {!walk} 134 + multiple times. 135 + 136 + All checkers receive events in the order they appear in the list. 137 + Each checker maintains independent state, so validation rules can 138 + be composed without interference. 139 + 140 + {b Example:} 141 + {[ 142 + (* Run multiple validation passes in one traversal *) 143 + let structure_checker = (module StructureChecker : Checker.S) in 144 + let attribute_checker = (module AttributeChecker : Checker.S) in 145 + let obsolete_checker = (module ObsoleteChecker : Checker.S) in 146 + 147 + let checkers = [structure_checker; attribute_checker; obsolete_checker] in 148 + let collector = Message_collector.create () in 149 + 150 + walk_all checkers collector document_node; 151 + 152 + (* All three checkers have validated the document *) 153 + let messages = Message_collector.messages collector in 154 + Message_format.print_messages messages 155 + ]} 156 + 157 + {b Empty list behavior:} 158 + If the checkers list is empty, the tree is traversed but no validation 159 + is performed. This is equivalent to calling [walk (Checker.noop ()) ...]. *) 160 + 161 + (** {1 Registry-Based Traversal} *) 162 + 163 + val walk_registry : 164 + Checker_registry.t -> Message_collector.t -> Html5rw.Dom.node -> unit 165 + (** [walk_registry registry collector node] traverses a DOM tree with all 166 + checkers from a registry. 167 + 168 + @param registry The registry containing checkers to apply 169 + @param collector The message collector for validation messages 170 + @param node The root node to start traversal from 171 + 172 + This is equivalent to: 173 + {[ 174 + let checkers = Checker_registry.all registry in 175 + walk_all checkers collector node 176 + ]} 177 + 178 + Use this when you want to run a pre-configured set of checkers 179 + without manually extracting them from the registry. 180 + 181 + {b Example:} 182 + {[ 183 + (* Set up registry with desired checkers *) 184 + let registry = Checker_registry.default () in 185 + Checker_registry.register registry "custom" my_checker; 186 + 187 + (* Validate multiple documents with same checker set *) 188 + List.iter (fun doc -> 189 + let collector = Message_collector.create () in 190 + walk_registry registry collector doc; 191 + report_results collector 192 + ) documents 193 + ]} 194 + 195 + {b Empty registry behavior:} 196 + If the registry is empty, the tree is traversed but no validation 197 + is performed. *)
+7
lib/html5_checker/dune
··· 1 + (include_subdirs unqualified) 2 + 3 + (library 4 + (name html5_checker) 5 + (public_name html5rw.checker) 6 + (libraries html5rw jsont jsont.bytesrw astring str) 7 + )
+78
lib/html5_checker/html5_checker.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + ---------------------------------------------------------------------------*) 5 + 6 + module Message = Message 7 + module Message_collector = Message_collector 8 + module Message_format = Message_format 9 + module Parse_error_bridge = Parse_error_bridge 10 + module Content_category = Content_category 11 + module Content_model = Content_model 12 + module Attr_spec = Attr_spec 13 + module Element_spec = Element_spec 14 + 15 + type t = { 16 + doc : Html5rw.t; 17 + msgs : Message.t list; 18 + system_id : string option; 19 + } 20 + 21 + let check ?(collect_parse_errors = true) ?system_id reader = 22 + let doc = Html5rw.parse ~collect_errors:collect_parse_errors reader in 23 + let collector = Message_collector.create () in 24 + 25 + (* Add parse errors if collected *) 26 + if collect_parse_errors then begin 27 + let parse_errors = Parse_error_bridge.collect_parse_errors ?system_id doc in 28 + List.iter (Message_collector.add collector) parse_errors 29 + end; 30 + 31 + (* TODO: Run checkers via dom_walker when available *) 32 + (* Dom_walker.walk_registry registry (Html5rw.root doc) collector; *) 33 + 34 + { doc; msgs = Message_collector.messages collector; system_id } 35 + 36 + let check_dom ?(collect_parse_errors = true) ?system_id doc = 37 + let collector = Message_collector.create () in 38 + 39 + (* Add parse errors if requested *) 40 + if collect_parse_errors then begin 41 + let parse_errors = Parse_error_bridge.collect_parse_errors ?system_id doc in 42 + List.iter (Message_collector.add collector) parse_errors 43 + end; 44 + 45 + (* TODO: Run checkers via dom_walker when available *) 46 + (* Dom_walker.walk_registry registry (Html5rw.root doc) collector; *) 47 + 48 + { doc; msgs = Message_collector.messages collector; system_id } 49 + 50 + let messages t = t.msgs 51 + 52 + let errors t = 53 + List.filter 54 + (fun msg -> msg.Message.severity = Message.Error) 55 + t.msgs 56 + 57 + let warnings t = 58 + List.filter 59 + (fun msg -> msg.Message.severity = Message.Warning) 60 + t.msgs 61 + 62 + let has_errors t = 63 + List.exists 64 + (fun msg -> msg.Message.severity = Message.Error) 65 + t.msgs 66 + 67 + let document t = t.doc 68 + 69 + let system_id t = t.system_id 70 + 71 + let format_text t = 72 + Message_format.format_text ?system_id:t.system_id t.msgs 73 + 74 + let format_json t = 75 + Message_format.format_json ?system_id:t.system_id t.msgs 76 + 77 + let format_gnu t = 78 + Message_format.format_gnu ?system_id:t.system_id t.msgs
+102
lib/html5_checker/html5_checker.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** HTML5 conformance checker. 7 + 8 + This module provides HTML5 validation and conformance checking, 9 + combining parse error detection with structural validation rules. *) 10 + 11 + (** {1 Re-exported modules} *) 12 + 13 + (** Validation message types and constructors. *) 14 + module Message = Message 15 + 16 + (** Message collection utilities. *) 17 + module Message_collector = Message_collector 18 + 19 + (** Message output formatters. *) 20 + module Message_format = Message_format 21 + 22 + (** Parse error bridge. *) 23 + module Parse_error_bridge = Parse_error_bridge 24 + 25 + (** {2 Content Model Framework} *) 26 + 27 + (** HTML5 content categories. *) 28 + module Content_category = Content_category 29 + 30 + (** HTML5 element content models. *) 31 + module Content_model = Content_model 32 + 33 + (** HTML5 attribute specifications. *) 34 + module Attr_spec = Attr_spec 35 + 36 + (** HTML5 element specifications. *) 37 + module Element_spec = Element_spec 38 + 39 + (** {1 Core Types} *) 40 + 41 + (** Result of checking an HTML document. *) 42 + type t 43 + 44 + (** {1 Checking Functions} *) 45 + 46 + (** Parse and validate HTML from a reader. 47 + 48 + This function parses the HTML input and optionally collects parse errors. 49 + Future versions will also run conformance checkers on the resulting DOM. 50 + 51 + @param collect_parse_errors If true, collect and include parse errors. Default: true. 52 + @param system_id Optional file path or URL for error reporting. 53 + @param reader Bytesrw reader containing HTML input. *) 54 + val check : 55 + ?collect_parse_errors:bool -> 56 + ?system_id:string -> 57 + Bytesrw.Bytes.Reader.t -> 58 + t 59 + 60 + (** Validate an already-parsed HTML document. 61 + 62 + This function takes an existing Html5rw.t parse result and validates it. 63 + 64 + @param collect_parse_errors If true, collect and include parse errors from the result. Default: true. 65 + @param system_id Optional file path or URL for error reporting. 66 + @param result Already-parsed HTML document. *) 67 + val check_dom : 68 + ?collect_parse_errors:bool -> 69 + ?system_id:string -> 70 + Html5rw.t -> 71 + t 72 + 73 + (** {1 Result Accessors} *) 74 + 75 + (** Get all validation messages. *) 76 + val messages : t -> Message.t list 77 + 78 + (** Get only error messages. *) 79 + val errors : t -> Message.t list 80 + 81 + (** Get only warning messages. *) 82 + val warnings : t -> Message.t list 83 + 84 + (** Check if there are any errors. *) 85 + val has_errors : t -> bool 86 + 87 + (** Get the underlying parsed document. *) 88 + val document : t -> Html5rw.t 89 + 90 + (** Get the system identifier if set. *) 91 + val system_id : t -> string option 92 + 93 + (** {1 Formatting} *) 94 + 95 + (** Format messages as human-readable text. *) 96 + val format_text : t -> string 97 + 98 + (** Format messages as JSON. *) 99 + val format_json : t -> string 100 + 101 + (** Format messages in GNU style. *) 102 + val format_gnu : t -> string
lib/html5_checker/message.cmi

This is a binary file and will not be displayed.

+80
lib/html5_checker/message.ml
··· 1 + type severity = Error | Warning | Info 2 + 3 + type location = { 4 + line : int; 5 + column : int; 6 + end_line : int option; 7 + end_column : int option; 8 + system_id : string option; 9 + } 10 + 11 + type t = { 12 + severity : severity; 13 + message : string; 14 + code : string option; 15 + location : location option; 16 + element : string option; 17 + attribute : string option; 18 + extract : string option; 19 + } 20 + 21 + let make ~severity ~message ?code ?location ?element ?attribute ?extract () = 22 + { severity; message; code; location; element; attribute; extract } 23 + 24 + let error ~message ?code ?location ?element ?attribute ?extract () = 25 + make ~severity:Error ~message ?code ?location ?element ?attribute ?extract () 26 + 27 + let warning ~message ?code ?location ?element ?attribute ?extract () = 28 + make ~severity:Warning ~message ?code ?location ?element ?attribute ?extract 29 + () 30 + 31 + let info ~message ?code ?location ?element ?attribute ?extract () = 32 + make ~severity:Info ~message ?code ?location ?element ?attribute ?extract () 33 + 34 + let make_location ~line ~column ?end_line ?end_column ?system_id () = 35 + { line; column; end_line; end_column; system_id } 36 + 37 + let severity_to_string = function 38 + | Error -> "error" 39 + | Warning -> "warning" 40 + | Info -> "info" 41 + 42 + let pp_severity fmt severity = 43 + Format.pp_print_string fmt (severity_to_string severity) 44 + 45 + let pp_location fmt loc = 46 + match loc.system_id with 47 + | Some sid -> Format.fprintf fmt "%s:" sid 48 + | None -> (); 49 + Format.fprintf fmt "%d:%d" loc.line loc.column; 50 + match (loc.end_line, loc.end_column) with 51 + | Some el, Some ec when el = loc.line && ec > loc.column -> 52 + Format.fprintf fmt "-%d" ec 53 + | Some el, Some ec when el > loc.line -> 54 + Format.fprintf fmt "-%d:%d" el ec 55 + | _ -> () 56 + 57 + let pp fmt msg = 58 + (match msg.location with 59 + | Some loc -> 60 + pp_location fmt loc; 61 + Format.fprintf fmt ": " 62 + | None -> ()); 63 + pp_severity fmt msg.severity; 64 + (match msg.code with 65 + | Some code -> Format.fprintf fmt " [%s]" code 66 + | None -> ()); 67 + Format.fprintf fmt ": %s" msg.message; 68 + (match msg.element with 69 + | Some elem -> Format.fprintf fmt " (element: %s)" elem 70 + | None -> ()); 71 + match msg.attribute with 72 + | Some attr -> Format.fprintf fmt " (attribute: %s)" attr 73 + | None -> () 74 + 75 + let to_string msg = 76 + let buf = Buffer.create 256 in 77 + let fmt = Format.formatter_of_buffer buf in 78 + pp fmt msg; 79 + Format.pp_print_flush fmt (); 80 + Buffer.contents buf
+104
lib/html5_checker/message.mli
··· 1 + (** HTML5 validation messages. 2 + 3 + This module provides types for validation messages including errors, 4 + warnings, and informational messages with source location tracking. *) 5 + 6 + (** Message severity levels. *) 7 + type severity = 8 + | Error (** Conformance error - document is invalid *) 9 + | Warning (** Conformance warning - likely problematic *) 10 + | Info (** Informational - suggestions for improvement *) 11 + 12 + (** Source location information. *) 13 + type location = { 14 + line : int; (** 1-indexed line number *) 15 + column : int; (** 1-indexed column number *) 16 + end_line : int option; (** Optional end line for ranges *) 17 + end_column : int option; (** Optional end column *) 18 + system_id : string option; (** File path or URL *) 19 + } 20 + 21 + (** A validation message. *) 22 + type t = { 23 + severity : severity; 24 + message : string; (** Human-readable description *) 25 + code : string option; (** Machine-readable error code *) 26 + location : location option; 27 + element : string option; (** Element name if relevant *) 28 + attribute : string option; (** Attribute name if relevant *) 29 + extract : string option; (** Source excerpt *) 30 + } 31 + 32 + (** {1 Constructors} *) 33 + 34 + (** Create a validation message with specified severity. *) 35 + val make : 36 + severity:severity -> 37 + message:string -> 38 + ?code:string -> 39 + ?location:location -> 40 + ?element:string -> 41 + ?attribute:string -> 42 + ?extract:string -> 43 + unit -> 44 + t 45 + 46 + (** Create an error message. *) 47 + val error : 48 + message:string -> 49 + ?code:string -> 50 + ?location:location -> 51 + ?element:string -> 52 + ?attribute:string -> 53 + ?extract:string -> 54 + unit -> 55 + t 56 + 57 + (** Create a warning message. *) 58 + val warning : 59 + message:string -> 60 + ?code:string -> 61 + ?location:location -> 62 + ?element:string -> 63 + ?attribute:string -> 64 + ?extract:string -> 65 + unit -> 66 + t 67 + 68 + (** Create an informational message. *) 69 + val info : 70 + message:string -> 71 + ?code:string -> 72 + ?location:location -> 73 + ?element:string -> 74 + ?attribute:string -> 75 + ?extract:string -> 76 + unit -> 77 + t 78 + 79 + (** Create a location record. *) 80 + val make_location : 81 + line:int -> 82 + column:int -> 83 + ?end_line:int -> 84 + ?end_column:int -> 85 + ?system_id:string -> 86 + unit -> 87 + location 88 + 89 + (** {1 Formatting} *) 90 + 91 + (** Convert severity to string representation. *) 92 + val severity_to_string : severity -> string 93 + 94 + (** Pretty-print severity. *) 95 + val pp_severity : Format.formatter -> severity -> unit 96 + 97 + (** Pretty-print location. *) 98 + val pp_location : Format.formatter -> location -> unit 99 + 100 + (** Pretty-print a validation message. *) 101 + val pp : Format.formatter -> t -> unit 102 + 103 + (** Convert a validation message to a string. *) 104 + val to_string : t -> string
+38
lib/html5_checker/message_collector.ml
··· 1 + type t = { mutable messages : Message.t list } 2 + 3 + let create () = { messages = [] } 4 + 5 + let add t msg = t.messages <- msg :: t.messages 6 + 7 + let add_error t ~message ?code ?location ?element ?attribute ?extract () = 8 + let msg = 9 + Message.error ~message ?code ?location ?element ?attribute ?extract () 10 + in 11 + add t msg 12 + 13 + let add_warning t ~message ?code ?location ?element ?attribute ?extract () = 14 + let msg = 15 + Message.warning ~message ?code ?location ?element ?attribute ?extract () 16 + in 17 + add t msg 18 + 19 + let messages t = List.rev t.messages 20 + 21 + let errors t = 22 + List.filter (fun msg -> msg.Message.severity = Message.Error) (messages t) 23 + 24 + let warnings t = 25 + List.filter (fun msg -> msg.Message.severity = Message.Warning) (messages t) 26 + 27 + let has_errors t = 28 + List.exists (fun msg -> msg.Message.severity = Message.Error) t.messages 29 + 30 + let count t = List.length t.messages 31 + 32 + let error_count t = 33 + List.fold_left 34 + (fun acc msg -> 35 + if msg.Message.severity = Message.Error then acc + 1 else acc) 36 + 0 t.messages 37 + 38 + let clear t = t.messages <- []
+65
lib/html5_checker/message_collector.mli
··· 1 + (** Message collector for accumulating validation messages. *) 2 + 3 + (** The type of a message collector. *) 4 + type t 5 + 6 + (** {1 Creation} *) 7 + 8 + (** Create a new empty message collector. *) 9 + val create : unit -> t 10 + 11 + (** {1 Adding Messages} *) 12 + 13 + (** Add a message to the collector. *) 14 + val add : t -> Message.t -> unit 15 + 16 + (** Add an error message to the collector. *) 17 + val add_error : 18 + t -> 19 + message:string -> 20 + ?code:string -> 21 + ?location:Message.location -> 22 + ?element:string -> 23 + ?attribute:string -> 24 + ?extract:string -> 25 + unit -> 26 + unit 27 + 28 + (** Add a warning message to the collector. *) 29 + val add_warning : 30 + t -> 31 + message:string -> 32 + ?code:string -> 33 + ?location:Message.location -> 34 + ?element:string -> 35 + ?attribute:string -> 36 + ?extract:string -> 37 + unit -> 38 + unit 39 + 40 + (** {1 Retrieving Messages} *) 41 + 42 + (** Get all messages in the order they were added. *) 43 + val messages : t -> Message.t list 44 + 45 + (** Get only error messages. *) 46 + val errors : t -> Message.t list 47 + 48 + (** Get only warning messages. *) 49 + val warnings : t -> Message.t list 50 + 51 + (** {1 Status Queries} *) 52 + 53 + (** Check if the collector contains any error messages. *) 54 + val has_errors : t -> bool 55 + 56 + (** Get the total number of messages. *) 57 + val count : t -> int 58 + 59 + (** Get the number of error messages. *) 60 + val error_count : t -> int 61 + 62 + (** {1 Modification} *) 63 + 64 + (** Clear all messages from the collector. *) 65 + val clear : t -> unit
+130
lib/html5_checker/message_format.ml
··· 1 + let format_text ?system_id messages = 2 + let buf = Buffer.create 1024 in 3 + List.iter 4 + (fun msg -> 5 + let loc_str = 6 + match msg.Message.location with 7 + | Some loc -> ( 8 + let sid = 9 + match loc.Message.system_id with 10 + | Some s -> s 11 + | None -> ( 12 + match system_id with Some s -> s | None -> "input") 13 + in 14 + let col_info = 15 + match (loc.end_line, loc.end_column) with 16 + | Some el, Some ec when el = loc.line && ec > loc.column -> 17 + Printf.sprintf "%d.%d-%d" loc.line loc.column ec 18 + | Some el, Some ec when el > loc.line -> 19 + Printf.sprintf "%d.%d-%d.%d" loc.line loc.column el ec 20 + | _ -> Printf.sprintf "%d.%d" loc.line loc.column 21 + in 22 + Printf.sprintf "%s:%s" sid col_info) 23 + | None -> ( 24 + match system_id with Some s -> s | None -> "input") 25 + in 26 + let severity_str = Message.severity_to_string msg.Message.severity in 27 + let code_str = 28 + match msg.Message.code with Some c -> " [" ^ c ^ "]" | None -> "" 29 + in 30 + let elem_str = 31 + match msg.Message.element with 32 + | Some e -> " (element: " ^ e ^ ")" 33 + | None -> "" 34 + in 35 + let attr_str = 36 + match msg.Message.attribute with 37 + | Some a -> " (attribute: " ^ a ^ ")" 38 + | None -> "" 39 + in 40 + Buffer.add_string buf 41 + (Printf.sprintf "%s: %s%s: %s%s%s\n" loc_str severity_str code_str 42 + msg.Message.message elem_str attr_str)) 43 + messages; 44 + Buffer.contents buf 45 + 46 + let format_gnu ?system_id messages = 47 + let buf = Buffer.create 1024 in 48 + List.iter 49 + (fun msg -> 50 + let loc_str = 51 + match msg.Message.location with 52 + | Some loc -> ( 53 + let sid = 54 + match loc.Message.system_id with 55 + | Some s -> s 56 + | None -> ( 57 + match system_id with Some s -> s | None -> "input") 58 + in 59 + Printf.sprintf "%s:%d:%d" sid loc.line loc.column) 60 + | None -> ( 61 + match system_id with Some s -> s ^ ":0:0" | None -> "input:0:0") 62 + in 63 + let severity_str = Message.severity_to_string msg.Message.severity in 64 + let code_str = 65 + match msg.Message.code with Some c -> " [" ^ c ^ "]" | None -> "" 66 + in 67 + Buffer.add_string buf 68 + (Printf.sprintf "%s: %s%s: %s\n" loc_str severity_str code_str 69 + msg.Message.message)) 70 + messages; 71 + Buffer.contents buf 72 + 73 + let message_to_json ?system_id msg = 74 + let open Jsont in 75 + let severity = String (Message.severity_to_string msg.Message.severity, Meta.none) in 76 + let message_text = String (msg.Message.message, Meta.none) in 77 + let base = [ (("type", Meta.none), severity); (("message", Meta.none), message_text) ] in 78 + let with_code = 79 + match msg.Message.code with 80 + | Some c -> (("subType", Meta.none), String (c, Meta.none)) :: base 81 + | None -> base 82 + in 83 + let with_location = 84 + match msg.Message.location with 85 + | Some loc -> 86 + let line = Number (float_of_int loc.Message.line, Meta.none) in 87 + let column = Number (float_of_int loc.Message.column, Meta.none) in 88 + let loc_fields = 89 + [ (("firstLine", Meta.none), line); (("firstColumn", Meta.none), column) ] 90 + in 91 + let loc_fields = 92 + match loc.Message.end_line with 93 + | Some el -> 94 + (("lastLine", Meta.none), Number (float_of_int el, Meta.none)) :: loc_fields 95 + | None -> loc_fields 96 + in 97 + let loc_fields = 98 + match loc.Message.end_column with 99 + | Some ec -> 100 + (("lastColumn", Meta.none), Number (float_of_int ec, Meta.none)) 101 + :: loc_fields 102 + | None -> loc_fields 103 + in 104 + let url = 105 + match loc.Message.system_id with 106 + | Some s -> s 107 + | None -> ( 108 + match system_id with Some s -> s | None -> "input") 109 + in 110 + (("url", Meta.none), String (url, Meta.none)) :: loc_fields @ with_code 111 + | None -> 112 + let url = 113 + match system_id with Some s -> s | None -> "input" 114 + in 115 + (("url", Meta.none), String (url, Meta.none)) :: with_code 116 + in 117 + let with_extract = 118 + match msg.Message.extract with 119 + | Some e -> (("extract", Meta.none), String (e, Meta.none)) :: with_location 120 + | None -> with_location 121 + in 122 + Object (with_extract, Meta.none) 123 + 124 + let format_json ?system_id messages = 125 + let open Jsont in 126 + let msg_array = Array (List.map (message_to_json ?system_id) messages, Meta.none) in 127 + let obj = Object ([ (("messages", Meta.none), msg_array) ], Meta.none) in 128 + match Jsont_bytesrw.encode_string ~format:Minify json obj with 129 + | Ok s -> s 130 + | Error e -> failwith ("JSON encoding error: " ^ e)
+28
lib/html5_checker/message_format.mli
··· 1 + (** Message output formatters. 2 + 3 + This module provides various output formats for validation messages, 4 + including text, JSON, and GNU-style formats for IDE integration. *) 5 + 6 + (** {1 Formatters} *) 7 + 8 + (** Format messages as human-readable text. 9 + 10 + Output format: [file:line:col: severity: message] 11 + 12 + @param system_id Optional default system identifier for messages without location. *) 13 + val format_text : ?system_id:string -> Message.t list -> string 14 + 15 + (** Format messages as JSON. 16 + 17 + Produces output compatible with the Nu HTML Validator JSON format. 18 + 19 + @param system_id Optional default system identifier for messages without location. *) 20 + val format_json : ?system_id:string -> Message.t list -> string 21 + 22 + (** Format messages in GNU style for IDE integration. 23 + 24 + Output format follows GNU conventions for error messages, compatible 25 + with most IDEs and editors. 26 + 27 + @param system_id Optional default system identifier for messages without location. *) 28 + val format_gnu : ?system_id:string -> Message.t list -> string
+22
lib/html5_checker/parse_error_bridge.ml
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + ---------------------------------------------------------------------------*) 5 + 6 + let of_parse_error ?system_id err = 7 + let code = Html5rw.error_code err in 8 + let line = Html5rw.error_line err in 9 + let column = Html5rw.error_column err in 10 + let location = 11 + Message.make_location ~line ~column ?system_id () 12 + in 13 + let code_str = Html5rw.Parse_error_code.to_string code in 14 + Message.error 15 + ~message:(Printf.sprintf "Parse error: %s" code_str) 16 + ~code:code_str 17 + ~location 18 + () 19 + 20 + let collect_parse_errors ?system_id result = 21 + let errors = Html5rw.errors result in 22 + List.map (of_parse_error ?system_id) errors
+25
lib/html5_checker/parse_error_bridge.mli
··· 1 + (*--------------------------------------------------------------------------- 2 + Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 + SPDX-License-Identifier: MIT 4 + ---------------------------------------------------------------------------*) 5 + 6 + (** Bridge between Html5rw parse errors and validation messages. 7 + 8 + This module converts parse errors from the Html5rw parser into 9 + standardized validation messages. *) 10 + 11 + (** Convert a parse error to a validation message. 12 + 13 + Extracts error code, line, column, and creates a Message.t with 14 + severity set to Error. 15 + 16 + @param system_id Optional file path or URL to include in location *) 17 + val of_parse_error : ?system_id:string -> Html5rw.parse_error -> Message.t 18 + 19 + (** Collect all parse errors from a parse result. 20 + 21 + Extracts all parse errors from the Html5rw.t result and converts 22 + them to validation messages. 23 + 24 + @param system_id Optional file path or URL to include in locations *) 25 + val collect_parse_errors : ?system_id:string -> Html5rw.t -> Message.t list
+246
lib/html5_checker/semantic/form_checker.ml
··· 1 + (** Form-related validation checker implementation. *) 2 + 3 + type state = { 4 + mutable in_form : bool; 5 + (** Track if we're currently inside a <form> element *) 6 + mutable form_ids : string list; 7 + (** Stack of form IDs we're currently nested in *) 8 + mutable label_for_refs : string list; 9 + (** Collect all label[for] references to validate later *) 10 + mutable element_ids : string list; 11 + (** Collect all element IDs to validate label references *) 12 + mutable unlabeled_controls : (string * string option) list; 13 + (** Controls that might need labels: (type, id) *) 14 + } 15 + 16 + let create () = 17 + { 18 + in_form = false; 19 + form_ids = []; 20 + label_for_refs = []; 21 + element_ids = []; 22 + unlabeled_controls = []; 23 + } 24 + 25 + let reset state = 26 + state.in_form <- false; 27 + state.form_ids <- []; 28 + state.label_for_refs <- []; 29 + state.element_ids <- []; 30 + state.unlabeled_controls <- [] 31 + 32 + (** Check if an attribute list contains a specific attribute. *) 33 + let has_attr name attrs = 34 + List.exists (fun (attr_name, _) -> String.equal attr_name name) attrs 35 + 36 + (** Get the value of an attribute if present. *) 37 + let get_attr name attrs = 38 + List.find_map 39 + (fun (attr_name, value) -> 40 + if String.equal attr_name name then Some value else None) 41 + attrs 42 + 43 + (** Check if an element is labelable. *) 44 + let _is_labelable_element name input_type = 45 + match name with 46 + | "button" | "meter" | "output" | "progress" | "select" | "textarea" -> true 47 + | "input" -> ( 48 + match input_type with Some "hidden" -> false | _ -> true) 49 + | _ -> false 50 + 51 + (** Valid autocomplete tokens for various input types. *) 52 + let valid_autocomplete_tokens = 53 + [ 54 + "on"; 55 + "off"; 56 + "name"; 57 + "honorific-prefix"; 58 + "given-name"; 59 + "additional-name"; 60 + "family-name"; 61 + "honorific-suffix"; 62 + "nickname"; 63 + "email"; 64 + "username"; 65 + "new-password"; 66 + "current-password"; 67 + "one-time-code"; 68 + "organization-title"; 69 + "organization"; 70 + "street-address"; 71 + "address-line1"; 72 + "address-line2"; 73 + "address-line3"; 74 + "address-level4"; 75 + "address-level3"; 76 + "address-level2"; 77 + "address-level1"; 78 + "country"; 79 + "country-name"; 80 + "postal-code"; 81 + "cc-name"; 82 + "cc-given-name"; 83 + "cc-additional-name"; 84 + "cc-family-name"; 85 + "cc-number"; 86 + "cc-exp"; 87 + "cc-exp-month"; 88 + "cc-exp-year"; 89 + "cc-csc"; 90 + "cc-type"; 91 + "transaction-currency"; 92 + "transaction-amount"; 93 + "language"; 94 + "bday"; 95 + "bday-day"; 96 + "bday-month"; 97 + "bday-year"; 98 + "sex"; 99 + "tel"; 100 + "tel-country-code"; 101 + "tel-national"; 102 + "tel-area-code"; 103 + "tel-local"; 104 + "tel-extension"; 105 + "impp"; 106 + "url"; 107 + "photo"; 108 + ] 109 + 110 + let check_autocomplete_value value _input_type collector = 111 + (* Parse autocomplete value - can be space-separated tokens *) 112 + let tokens = String.split_on_char ' ' value |> List.map String.trim in 113 + let tokens = List.filter (fun s -> String.length s > 0) tokens in 114 + 115 + (* The last token should be a valid autocomplete token *) 116 + match List.rev tokens with 117 + | [] -> () 118 + | last_token :: _prefix_tokens -> 119 + if not (List.mem last_token valid_autocomplete_tokens) then 120 + Message_collector.add_warning collector 121 + ~message: 122 + (Printf.sprintf "Unknown autocomplete value: %s" last_token) 123 + ~code:"invalid-autocomplete-value" ~element:"input" 124 + ~attribute:"autocomplete" () 125 + 126 + let check_input_element state attrs collector = 127 + let input_type = get_attr "type" attrs in 128 + let id = get_attr "id" attrs in 129 + 130 + (* Track this input's ID if present *) 131 + (match id with 132 + | Some id_val -> state.element_ids <- id_val :: state.element_ids 133 + | None -> ()); 134 + 135 + (* Check various input-specific rules *) 136 + (match input_type with 137 + | Some "radio" | Some "checkbox" -> 138 + (* Radio and checkbox should have labels *) 139 + state.unlabeled_controls <- 140 + (Option.value input_type ~default:"text", id) 141 + :: state.unlabeled_controls 142 + | Some "submit" | Some "button" | Some "reset" -> 143 + (* These don't need labels *) 144 + () 145 + | _ -> ()); 146 + 147 + (* Check autocomplete attribute *) 148 + (match get_attr "autocomplete" attrs with 149 + | Some autocomplete_value -> 150 + check_autocomplete_value autocomplete_value input_type collector 151 + | None -> ()); 152 + 153 + (* Check for select multiple with size=1 *) 154 + () 155 + 156 + let check_select_element attrs collector = 157 + let multiple = has_attr "multiple" attrs in 158 + let size = get_attr "size" attrs in 159 + 160 + match (multiple, size) with 161 + | true, Some "1" -> 162 + Message_collector.add_warning collector 163 + ~message:"select element with multiple should not have size=\"1\"" 164 + ~code:"contradictory-attributes" ~element:"select" ~attribute:"size" 165 + () 166 + | _ -> () 167 + 168 + let check_button_element state attrs collector = 169 + (* button[type=submit] should be in form or have form attribute *) 170 + let button_type = get_attr "type" attrs in 171 + let has_form_attr = has_attr "form" attrs in 172 + 173 + match button_type with 174 + | Some "submit" | None -> 175 + (* Default type is submit *) 176 + if (not state.in_form) && not has_form_attr then 177 + Message_collector.add_warning collector 178 + ~message: 179 + "button element with type=\"submit\" should be inside a form or \ 180 + have form attribute" 181 + ~code:"submit-button-outside-form" ~element:"button" () 182 + | _ -> () 183 + 184 + let check_label_element state attrs _collector = 185 + (* Collect label[for] references *) 186 + match get_attr "for" attrs with 187 + | Some for_id -> state.label_for_refs <- for_id :: state.label_for_refs 188 + | None -> () 189 + 190 + let start_element state ~name ~namespace:_ ~attrs collector = 191 + (* Track element IDs *) 192 + (match get_attr "id" attrs with 193 + | Some id_val -> state.element_ids <- id_val :: state.element_ids 194 + | None -> ()); 195 + 196 + match name with 197 + | "form" -> 198 + state.in_form <- true; 199 + (match get_attr "id" attrs with 200 + | Some id -> state.form_ids <- id :: state.form_ids 201 + | None -> ()) 202 + | "input" -> check_input_element state attrs collector 203 + | "select" -> check_select_element attrs collector 204 + | "button" -> check_button_element state attrs collector 205 + | "label" -> check_label_element state attrs collector 206 + | _ -> () 207 + 208 + let end_element state ~name ~namespace:_ _collector = 209 + match name with 210 + | "form" -> 211 + state.in_form <- false; 212 + (match state.form_ids with 213 + | _ :: rest -> state.form_ids <- rest 214 + | [] -> ()) 215 + | _ -> () 216 + 217 + let characters _state _text _collector = () 218 + 219 + let end_document state collector = 220 + (* Validate label[for] references *) 221 + List.iter 222 + (fun for_id -> 223 + if not (List.mem for_id state.element_ids) then 224 + Message_collector.add_warning collector 225 + ~message: 226 + (Printf.sprintf 227 + "label element references non-existent ID: %s" for_id) 228 + ~code:"invalid-label-reference" ~element:"label" ~attribute:"for" 229 + ()) 230 + state.label_for_refs; 231 + 232 + (* Note: We can't reliably detect unlabeled controls without tracking 233 + label parent-child relationships, which would require more complex 234 + state tracking. For now, we just validate explicit label[for] references. *) 235 + () 236 + 237 + let checker = (module struct 238 + type nonrec state = state 239 + 240 + let create = create 241 + let reset = reset 242 + let start_element = start_element 243 + let end_element = end_element 244 + let characters = characters 245 + let end_document = end_document 246 + end : Checker.S)
+62
lib/html5_checker/semantic/form_checker.mli
··· 1 + (** Form-related validation checker. 2 + 3 + Validates form control associations, label references, and form structure 4 + according to HTML5 accessibility and semantic requirements. This checker 5 + ensures that: 6 + 7 + - Form controls have proper labels 8 + - Label associations are valid 9 + - Form attributes are correctly configured 10 + - Form structure follows HTML5 constraints 11 + 12 + {2 Validation Rules} 13 + 14 + {b Label Associations} 15 + - [label] with [for] attribute should reference a labelable element ID 16 + - [input\[type=radio\]] should have an associated visible label 17 + - [input\[type=checkbox\]] should have an associated visible label 18 + 19 + {b Form Control Validation} 20 + - [button\[type=submit\]] should be inside a [form] or have [form] attribute 21 + - [input\[type=image\]] requires [alt] attribute (validated by required_attr_checker) 22 + - [input\[type=hidden\]] should not have [required] attribute (validated by required_attr_checker) 23 + - [input\[type=file\]] should not have [value] attribute (validated by required_attr_checker) 24 + 25 + {b Autocomplete} 26 + - [autocomplete] values should be appropriate for the [input] type 27 + - Common autocomplete values include: [on], [off], [name], [email], 28 + [username], [current-password], [new-password], [street-address], etc. 29 + 30 + {b Select Elements} 31 + - [select\[multiple\]] should not have [size="1"] (contradictory) 32 + - [select] should contain at least one [option] or [optgroup] 33 + 34 + {b Accessibility} 35 + - Form controls should be reachable and operable via keyboard 36 + - Radio buttons with the same [name] should form a logical group 37 + 38 + {3 Labelable Elements} 39 + 40 + The following elements can be associated with a [label]: 41 + - [button] 42 + - [input] (except [type=hidden]) 43 + - [meter] 44 + - [output] 45 + - [progress] 46 + - [select] 47 + - [textarea] 48 + 49 + @see <https://html.spec.whatwg.org/multipage/forms.html> WHATWG HTML: Forms 50 + @see <https://www.w3.org/WAI/WCAG21/Understanding/labels-or-instructions.html> 51 + WCAG: Labels or Instructions *) 52 + 53 + include Checker.S 54 + 55 + val checker : Checker.t 56 + (** A first-class module instance of this checker. 57 + 58 + {b Usage:} 59 + {[ 60 + let checker = Form_checker.checker in 61 + Checker_registry.register "form-validation" checker 62 + ]} *)
+219
lib/html5_checker/semantic/id_checker.ml
··· 1 + (** ID uniqueness and reference checker. 2 + 3 + This checker validates that: 4 + - ID attributes are unique within the document 5 + - ID references point to existing IDs 6 + - ID values conform to HTML5 requirements *) 7 + 8 + (** Location information for ID occurrences. *) 9 + type id_location = { 10 + element : string; 11 + location : Message.location option; 12 + } 13 + 14 + (** Information about an ID reference. *) 15 + type id_reference = { 16 + referring_element : string; 17 + attribute : string; 18 + referenced_id : string; 19 + location : Message.location option; 20 + } 21 + 22 + (** Checker state tracking IDs and references. *) 23 + type state = { 24 + ids : (string, id_location) Hashtbl.t; 25 + mutable references : id_reference list; 26 + } 27 + 28 + let create () = 29 + { 30 + ids = Hashtbl.create 64; 31 + references = []; 32 + } 33 + 34 + let reset state = 35 + Hashtbl.clear state.ids; 36 + state.references <- [] 37 + 38 + (** Check if a string contains whitespace. *) 39 + let contains_whitespace s = 40 + String.contains s ' ' || String.contains s '\t' || 41 + String.contains s '\n' || String.contains s '\r' 42 + 43 + (** Extract ID from a usemap value (removes leading #). *) 44 + let extract_usemap_id value = 45 + if String.length value > 0 && value.[0] = '#' then 46 + Some (String.sub value 1 (String.length value - 1)) 47 + else 48 + None 49 + 50 + (** Split whitespace-separated ID references. *) 51 + let split_ids value = 52 + let rec split acc start i = 53 + if i >= String.length value then 54 + if i > start then 55 + (String.sub value start (i - start)) :: acc 56 + else 57 + acc 58 + else 59 + match value.[i] with 60 + | ' ' | '\t' | '\n' | '\r' -> 61 + let acc' = 62 + if i > start then 63 + (String.sub value start (i - start)) :: acc 64 + else 65 + acc 66 + in 67 + split acc' (i + 1) (i + 1) 68 + | _ -> 69 + split acc start (i + 1) 70 + in 71 + List.rev (split [] 0 0) 72 + 73 + (** Attributes that reference a single ID. *) 74 + let single_id_ref_attrs = [ 75 + "for"; (* label *) 76 + "form"; (* form-associated elements *) 77 + "list"; (* input *) 78 + "aria-activedescendant"; 79 + ] 80 + 81 + (** Attributes that reference multiple IDs (space-separated). *) 82 + let multi_id_ref_attrs = [ 83 + "headers"; (* td, th *) 84 + "aria-labelledby"; 85 + "aria-describedby"; 86 + "aria-controls"; 87 + "aria-flowto"; 88 + "aria-owns"; 89 + "itemref"; 90 + ] 91 + 92 + (** Check and store an ID attribute. *) 93 + let check_id state ~element ~id ~location collector = 94 + (* Check for empty ID *) 95 + if String.length id = 0 then 96 + Message_collector.add_error collector 97 + ~message:"ID attribute must not be empty" 98 + ~code:"empty-id" 99 + ?location 100 + ~element 101 + ~attribute:"id" 102 + () 103 + (* Check for whitespace in ID *) 104 + else if contains_whitespace id then 105 + Message_collector.add_error collector 106 + ~message:(Printf.sprintf "ID attribute value '%s' must not contain whitespace" id) 107 + ~code:"id-whitespace" 108 + ?location 109 + ~element 110 + ~attribute:"id" 111 + () 112 + (* Check for duplicate ID *) 113 + else if Hashtbl.mem state.ids id then 114 + let first_occurrence = Hashtbl.find state.ids id in 115 + let first_loc_str = match first_occurrence.location with 116 + | None -> "" 117 + | Some loc -> Printf.sprintf " at line %d, column %d" loc.line loc.column 118 + in 119 + Message_collector.add_error collector 120 + ~message:(Printf.sprintf 121 + "Duplicate ID '%s': first used on <%s>%s, now on <%s>" 122 + id first_occurrence.element first_loc_str element) 123 + ~code:"duplicate-id" 124 + ?location 125 + ~element 126 + ~attribute:"id" 127 + () 128 + else 129 + (* Store the ID *) 130 + Hashtbl.add state.ids id { element; location } 131 + 132 + (** Record a single ID reference. *) 133 + let add_reference state ~referring_element ~attribute ~referenced_id ~location = 134 + if String.length referenced_id > 0 then 135 + state.references <- { 136 + referring_element; 137 + attribute; 138 + referenced_id; 139 + location; 140 + } :: state.references 141 + 142 + (** Process attributes to check IDs and collect references. *) 143 + let process_attrs state ~element ~attrs ~location collector = 144 + List.iter (fun (name, value) -> 145 + match name with 146 + | "id" -> 147 + check_id state ~element ~id:value ~location collector 148 + 149 + | "usemap" -> 150 + (* usemap references a map name, which is like an ID reference *) 151 + begin match extract_usemap_id value with 152 + | Some id -> 153 + add_reference state ~referring_element:element 154 + ~attribute:name ~referenced_id:id ~location 155 + | None -> 156 + if String.length value > 0 then 157 + Message_collector.add_error collector 158 + ~message:(Printf.sprintf 159 + "usemap attribute value '%s' must start with '#'" value) 160 + ~code:"invalid-usemap" 161 + ?location 162 + ~element 163 + ~attribute:name 164 + () 165 + end 166 + 167 + | attr when List.mem attr single_id_ref_attrs -> 168 + add_reference state ~referring_element:element 169 + ~attribute:attr ~referenced_id:value ~location 170 + 171 + | attr when List.mem attr multi_id_ref_attrs -> 172 + (* Split space-separated IDs and add each as a reference *) 173 + let ids = split_ids value in 174 + List.iter (fun id -> 175 + add_reference state ~referring_element:element 176 + ~attribute:attr ~referenced_id:id ~location 177 + ) ids 178 + 179 + | _ -> () 180 + ) attrs 181 + 182 + let start_element state ~name ~namespace:_ ~attrs collector = 183 + (* For now, we don't have location information from the DOM walker, 184 + so we pass None. In a full implementation, this would be passed 185 + from the parser. *) 186 + let location = None in 187 + process_attrs state ~element:name ~attrs ~location collector 188 + 189 + let end_element _state ~name:_ ~namespace:_ _collector = 190 + () 191 + 192 + let characters _state _text _collector = 193 + () 194 + 195 + let end_document state collector = 196 + (* Check all references point to existing IDs *) 197 + List.iter (fun ref -> 198 + if not (Hashtbl.mem state.ids ref.referenced_id) then 199 + Message_collector.add_error collector 200 + ~message:(Printf.sprintf 201 + "The '%s' attribute on <%s> refers to ID '%s' which does not exist" 202 + ref.attribute ref.referring_element ref.referenced_id) 203 + ~code:"dangling-id-reference" 204 + ?location:ref.location 205 + ~element:ref.referring_element 206 + ~attribute:ref.attribute 207 + () 208 + ) state.references 209 + 210 + let checker = (module struct 211 + type nonrec state = state 212 + 213 + let create = create 214 + let reset = reset 215 + let start_element = start_element 216 + let end_element = end_element 217 + let characters = characters 218 + let end_document = end_document 219 + end : Checker.S)
+11
lib/html5_checker/semantic/id_checker.mli
··· 1 + (** ID uniqueness and reference checker. 2 + 3 + Validates: 4 + - ID attributes are unique within the document 5 + - ID references (for, headers, aria-*, etc.) point to existing IDs 6 + - ID values conform to HTML5 requirements *) 7 + 8 + include Checker.S 9 + 10 + val checker : Checker.t 11 + (** [checker] is a checker instance for validating ID uniqueness and references. *)
+296
lib/html5_checker/semantic/nesting_checker.ml
··· 1 + (** Interactive element nesting checker implementation. *) 2 + 3 + (** Special ancestors that need tracking for nesting validation. 4 + 5 + This array defines the elements whose presence in the ancestor chain 6 + affects validation of descendant elements. The order is significant 7 + as it determines bit positions in the ancestor bitmask. *) 8 + let special_ancestors = 9 + [| "a"; "address"; "body"; "button"; "caption"; "dfn"; "dt"; "figcaption"; 10 + "figure"; "footer"; "form"; "header"; "label"; "map"; "noscript"; "th"; 11 + "time"; "progress"; "meter"; "article"; "section"; "aside"; "nav"; "h1"; 12 + "h2"; "h3"; "h4"; "h5"; "h6" |] 13 + 14 + (** Get the bit position for a special ancestor element. 15 + Returns [-1] if the element is not a special ancestor. *) 16 + let special_ancestor_number name = 17 + let rec find i = 18 + if i >= Array.length special_ancestors then -1 19 + else if special_ancestors.(i) = name then i 20 + else find (i + 1) 21 + in 22 + find 0 23 + 24 + (** Interactive elements that cannot be nested inside [a] or [button]. *) 25 + let interactive_elements = 26 + [| "a"; "button"; "details"; "embed"; "iframe"; "label"; "select"; 27 + "textarea" |] 28 + 29 + (** Map from descendant element name to bitmask of prohibited ancestors. *) 30 + let ancestor_mask_by_descendant : (string, int) Hashtbl.t = 31 + Hashtbl.create 64 32 + 33 + (** Register that [ancestor] is prohibited for [descendant]. *) 34 + let register_prohibited_ancestor ancestor descendant = 35 + let number = special_ancestor_number ancestor in 36 + if number = -1 then 37 + failwith ("Ancestor not found in array: " ^ ancestor); 38 + let mask = 39 + match Hashtbl.find_opt ancestor_mask_by_descendant descendant with 40 + | None -> 0 41 + | Some m -> m 42 + in 43 + let new_mask = mask lor (1 lsl number) in 44 + Hashtbl.replace ancestor_mask_by_descendant descendant new_mask 45 + 46 + (** Initialize the prohibited ancestor map. *) 47 + let () = 48 + (* Self-nesting restrictions *) 49 + register_prohibited_ancestor "form" "form"; 50 + register_prohibited_ancestor "progress" "progress"; 51 + register_prohibited_ancestor "meter" "meter"; 52 + register_prohibited_ancestor "dfn" "dfn"; 53 + register_prohibited_ancestor "noscript" "noscript"; 54 + register_prohibited_ancestor "label" "label"; 55 + 56 + (* Address restrictions *) 57 + register_prohibited_ancestor "address" "address"; 58 + register_prohibited_ancestor "address" "section"; 59 + register_prohibited_ancestor "address" "nav"; 60 + register_prohibited_ancestor "address" "article"; 61 + register_prohibited_ancestor "address" "header"; 62 + register_prohibited_ancestor "address" "footer"; 63 + register_prohibited_ancestor "address" "h1"; 64 + register_prohibited_ancestor "address" "h2"; 65 + register_prohibited_ancestor "address" "h3"; 66 + register_prohibited_ancestor "address" "h4"; 67 + register_prohibited_ancestor "address" "h5"; 68 + register_prohibited_ancestor "address" "h6"; 69 + 70 + (* Header/footer restrictions *) 71 + register_prohibited_ancestor "header" "header"; 72 + register_prohibited_ancestor "footer" "header"; 73 + register_prohibited_ancestor "header" "footer"; 74 + register_prohibited_ancestor "footer" "footer"; 75 + 76 + (* dt restrictions *) 77 + register_prohibited_ancestor "dt" "header"; 78 + register_prohibited_ancestor "dt" "footer"; 79 + register_prohibited_ancestor "dt" "article"; 80 + register_prohibited_ancestor "dt" "nav"; 81 + register_prohibited_ancestor "dt" "section"; 82 + register_prohibited_ancestor "dt" "h1"; 83 + register_prohibited_ancestor "dt" "h2"; 84 + register_prohibited_ancestor "dt" "h3"; 85 + register_prohibited_ancestor "dt" "h4"; 86 + register_prohibited_ancestor "dt" "h5"; 87 + register_prohibited_ancestor "dt" "h6"; 88 + register_prohibited_ancestor "dt" "hgroup"; 89 + 90 + (* th restrictions *) 91 + register_prohibited_ancestor "th" "header"; 92 + register_prohibited_ancestor "th" "footer"; 93 + register_prohibited_ancestor "th" "article"; 94 + register_prohibited_ancestor "th" "nav"; 95 + register_prohibited_ancestor "th" "section"; 96 + register_prohibited_ancestor "th" "h1"; 97 + register_prohibited_ancestor "th" "h2"; 98 + register_prohibited_ancestor "th" "h3"; 99 + register_prohibited_ancestor "th" "h4"; 100 + register_prohibited_ancestor "th" "h5"; 101 + register_prohibited_ancestor "th" "h6"; 102 + register_prohibited_ancestor "th" "hgroup"; 103 + 104 + (* Caption restriction *) 105 + register_prohibited_ancestor "caption" "table"; 106 + 107 + (* Interactive element restrictions: cannot be inside a or button *) 108 + Array.iter (fun elem -> 109 + register_prohibited_ancestor "a" elem; 110 + register_prohibited_ancestor "button" elem 111 + ) interactive_elements 112 + 113 + (** Bitmask constants for common checks. *) 114 + let a_button_mask = 115 + let a_num = special_ancestor_number "a" in 116 + let button_num = special_ancestor_number "button" in 117 + (1 lsl a_num) lor (1 lsl button_num) 118 + 119 + let map_mask = 120 + let map_num = special_ancestor_number "map" in 121 + 1 lsl map_num 122 + 123 + (** Stack node representing an element's context. *) 124 + type stack_node = { 125 + ancestor_mask : int; 126 + _name : string; [@warning "-69"] 127 + } 128 + 129 + (** Checker state. *) 130 + type state = { 131 + mutable stack : stack_node list; 132 + mutable ancestor_mask : int; 133 + } 134 + 135 + let create () = 136 + { stack = []; ancestor_mask = 0 } 137 + 138 + let reset state = 139 + state.stack <- []; 140 + state.ancestor_mask <- 0 141 + 142 + (** Get attribute value by name from attribute list. *) 143 + let get_attr attrs name = 144 + List.assoc_opt name attrs 145 + 146 + (** Check if an attribute exists. *) 147 + let has_attr attrs name = 148 + get_attr attrs name <> None 149 + 150 + (** Check if element is interactive based on its attributes. *) 151 + let is_interactive_element name attrs = 152 + match name with 153 + | "a" -> 154 + has_attr attrs "href" 155 + | "audio" | "video" -> 156 + has_attr attrs "controls" 157 + | "img" | "object" -> 158 + has_attr attrs "usemap" 159 + | "input" -> 160 + begin match get_attr attrs "type" with 161 + | Some "hidden" -> false 162 + | _ -> true 163 + end 164 + | "button" | "details" | "embed" | "iframe" | "label" | "select" 165 + | "textarea" -> 166 + true 167 + | _ -> 168 + false 169 + 170 + (** Get a human-readable description of an element for error messages. *) 171 + let element_description name attrs = 172 + match name with 173 + | "a" when has_attr attrs "href" -> 174 + "The element \"a\" with the attribute \"href\"" 175 + | "audio" when has_attr attrs "controls" -> 176 + "The element \"audio\" with the attribute \"controls\"" 177 + | "video" when has_attr attrs "controls" -> 178 + "The element \"video\" with the attribute \"controls\"" 179 + | "img" when has_attr attrs "usemap" -> 180 + "The element \"img\" with the attribute \"usemap\"" 181 + | "object" when has_attr attrs "usemap" -> 182 + "The element \"object\" with the attribute \"usemap\"" 183 + | _ -> 184 + Printf.sprintf "The element \"%s\"" name 185 + 186 + (** Report nesting violations. *) 187 + let check_nesting state name attrs collector = 188 + (* Compute the prohibited ancestor mask for this element *) 189 + let base_mask = 190 + match Hashtbl.find_opt ancestor_mask_by_descendant name with 191 + | Some m -> m 192 + | None -> 0 193 + in 194 + 195 + (* Add interactive element restrictions if applicable *) 196 + let mask = 197 + if is_interactive_element name attrs then 198 + base_mask lor a_button_mask 199 + else 200 + base_mask 201 + in 202 + 203 + (* Check for violations *) 204 + if mask <> 0 then begin 205 + let mask_hit = state.ancestor_mask land mask in 206 + if mask_hit <> 0 then begin 207 + let desc = element_description name attrs in 208 + (* Find which ancestors are violated *) 209 + Array.iteri (fun i ancestor -> 210 + let bit = 1 lsl i in 211 + if (mask_hit land bit) <> 0 then 212 + Message_collector.add_error collector 213 + ~message:(Printf.sprintf 214 + "%s must not appear as a descendant of the \"%s\" element." 215 + desc ancestor) 216 + ~element:name 217 + () 218 + ) special_ancestors 219 + end 220 + end 221 + 222 + (** Check for required ancestors. *) 223 + let check_required_ancestors state name collector = 224 + match name with 225 + | "area" -> 226 + if (state.ancestor_mask land map_mask) = 0 then 227 + Message_collector.add_error collector 228 + ~message:"The \"area\" element must have a \"map\" ancestor." 229 + ~element:name 230 + () 231 + | _ -> () 232 + 233 + let start_element state ~name ~namespace ~attrs collector = 234 + (* Only check HTML elements, not SVG or MathML *) 235 + match namespace with 236 + | Some _ -> () 237 + | None -> 238 + (* Check for nesting violations *) 239 + check_nesting state name attrs collector; 240 + check_required_ancestors state name collector; 241 + 242 + (* Update ancestor mask if this is a special ancestor *) 243 + let new_mask = state.ancestor_mask in 244 + let number = special_ancestor_number name in 245 + let new_mask = 246 + if number >= 0 then 247 + new_mask lor (1 lsl number) 248 + else 249 + new_mask 250 + in 251 + 252 + (* Add href tracking for <a> elements *) 253 + let new_mask = 254 + if name = "a" && has_attr attrs "href" then 255 + let a_num = special_ancestor_number "a" in 256 + new_mask lor (1 lsl a_num) 257 + else 258 + new_mask 259 + in 260 + 261 + (* Push onto stack *) 262 + let node = { ancestor_mask = state.ancestor_mask; _name = name } in 263 + state.stack <- node :: state.stack; 264 + state.ancestor_mask <- new_mask 265 + 266 + let end_element state ~name:_ ~namespace _collector = 267 + (* Only track HTML elements *) 268 + match namespace with 269 + | Some _ -> () 270 + | None -> 271 + (* Pop from stack and restore ancestor mask *) 272 + begin match state.stack with 273 + | [] -> () (* Should not happen in well-formed documents *) 274 + | node :: rest -> 275 + state.stack <- rest; 276 + state.ancestor_mask <- node.ancestor_mask 277 + end 278 + 279 + let characters _state _text _collector = 280 + () (* No text-specific nesting checks *) 281 + 282 + let end_document _state _collector = 283 + () (* No document-level checks needed *) 284 + 285 + (** Create the checker as a first-class module. *) 286 + let checker = 287 + let module M = struct 288 + type nonrec state = state 289 + let create = create 290 + let reset = reset 291 + let start_element = start_element 292 + let end_element = end_element 293 + let characters = characters 294 + let end_document = end_document 295 + end in 296 + (module M : Checker.S)
+79
lib/html5_checker/semantic/nesting_checker.mli
··· 1 + (** Interactive element nesting checker. 2 + 3 + Validates that interactive elements are not nested in ways that violate 4 + HTML5 specifications. This checker tracks ancestor elements and ensures 5 + that prohibited nesting patterns are detected and reported. 6 + 7 + {2 Validation Rules} 8 + 9 + The checker enforces the following prohibited nesting relationships: 10 + 11 + {3 Self-nesting Restrictions} 12 + 13 + These elements cannot be nested inside themselves: 14 + - [form] cannot contain [form] 15 + - [progress] cannot contain [progress] 16 + - [meter] cannot contain [meter] 17 + - [dfn] cannot contain [dfn] 18 + - [noscript] cannot contain [noscript] 19 + - [label] cannot contain [label] 20 + 21 + {3 Structural Element Restrictions} 22 + 23 + - [header] cannot be inside [header], [footer], or [address] 24 + - [footer] cannot be inside [header], [footer], or [address] 25 + - [address] cannot contain [header], [footer], [article], [section], 26 + [nav], or heading elements ([h1]-[h6]) 27 + 28 + {3 Interactive Content Restrictions} 29 + 30 + Interactive elements cannot be descendants of [a] (with [href]) or 31 + [button]: 32 + 33 + - [a] (when it has [href]) cannot be inside [a] or [button] 34 + - [button] cannot be inside [a] or [button] 35 + - [details] cannot be inside [a] or [button] 36 + - [embed] cannot be inside [a] or [button] 37 + - [iframe] cannot be inside [a] or [button] 38 + - [label] cannot be inside [a] or [button] 39 + - [select] cannot be inside [a] or [button] 40 + - [textarea] cannot be inside [a] or [button] 41 + - [audio] (with [controls]) cannot be inside [a] or [button] 42 + - [video] (with [controls]) cannot be inside [a] or [button] 43 + - [input] (except [type=hidden]) cannot be inside [a] or [button] 44 + - [img] (with [usemap]) cannot be inside [a] or [button] 45 + - [object] (with [usemap]) cannot be inside [a] or [button] 46 + 47 + {3 Table Cell Restrictions} 48 + 49 + - [dt] and [th] cannot contain [header], [footer], [article], [section], 50 + [nav], heading elements ([h1]-[h6]), or [hgroup] 51 + 52 + {3 Other Restrictions} 53 + 54 + - [caption] cannot contain [table] 55 + - [area] must have a [map] ancestor 56 + 57 + {2 Implementation Details} 58 + 59 + The checker uses a bitmask-based approach to efficiently track ancestor 60 + elements. Each "special" ancestor element has a corresponding bit in the 61 + ancestor mask. As elements are opened and closed during traversal, the 62 + mask is updated to reflect the current ancestor context. 63 + 64 + When an element is encountered, the checker: 65 + 1. Computes which ancestors would be prohibited for this element 66 + 2. Checks if any of those prohibited ancestors are present in the 67 + current ancestor mask 68 + 3. Reports errors for any violations found 69 + 4. Updates the ancestor mask to include the current element (if it's 70 + a special ancestor) 71 + 72 + @see <https://html.spec.whatwg.org/multipage/dom.html#content-models> 73 + HTML5 specification: Content models 74 + *) 75 + 76 + include Checker.S 77 + 78 + val checker : Checker.t 79 + (** [checker] is a checker instance for validating element nesting rules. *)
+339
lib/html5_checker/semantic/obsolete_checker.ml
··· 1 + (** Obsolete elements map: element name -> suggestion message *) 2 + let obsolete_elements = 3 + let tbl = Hashtbl.create 32 in 4 + Hashtbl.add tbl "applet" "Use \"embed\" or \"object\" element instead."; 5 + Hashtbl.add tbl "acronym" "Use the \"abbr\" element instead."; 6 + Hashtbl.add tbl "bgsound" "Use the \"audio\" element instead."; 7 + Hashtbl.add tbl "dir" "Use the \"ul\" element instead."; 8 + Hashtbl.add tbl "frame" "Use the \"iframe\" element and CSS instead, or use server-side includes."; 9 + Hashtbl.add tbl "frameset" "Use the \"iframe\" element and CSS instead, or use server-side includes."; 10 + Hashtbl.add tbl "noframes" "Use the \"iframe\" element and CSS instead, or use server-side includes."; 11 + Hashtbl.add tbl "isindex" "Use the \"form\" element containing \"input\" element of type \"text\" instead."; 12 + Hashtbl.add tbl "keygen" ""; 13 + Hashtbl.add tbl "listing" "Use \"pre\" or \"code\" element instead."; 14 + Hashtbl.add tbl "menuitem" "Use script to handle \"contextmenu\" event instead."; 15 + Hashtbl.add tbl "nextid" "Use GUIDs instead."; 16 + Hashtbl.add tbl "noembed" "Use the \"object\" element instead."; 17 + Hashtbl.add tbl "param" "Use the \"data\" attribute of the \"object\" element to set the URL of the external resource."; 18 + Hashtbl.add tbl "plaintext" "Use the \"text/plain\" MIME type instead."; 19 + Hashtbl.add tbl "rb" ""; 20 + Hashtbl.add tbl "rtc" ""; 21 + Hashtbl.add tbl "strike" "Use \"del\" or \"s\" element instead."; 22 + Hashtbl.add tbl "xmp" "Use \"pre\" or \"code\" element instead."; 23 + Hashtbl.add tbl "basefont" "Use CSS instead."; 24 + Hashtbl.add tbl "big" "Use CSS instead."; 25 + Hashtbl.add tbl "blink" "Use CSS instead."; 26 + Hashtbl.add tbl "center" "Use CSS instead."; 27 + Hashtbl.add tbl "font" "Use CSS instead."; 28 + Hashtbl.add tbl "marquee" "Use CSS instead."; 29 + Hashtbl.add tbl "multicol" "Use CSS instead."; 30 + Hashtbl.add tbl "nobr" "Use CSS instead."; 31 + Hashtbl.add tbl "spacer" "Use CSS instead."; 32 + Hashtbl.add tbl "tt" "Use CSS instead."; 33 + tbl 34 + 35 + (** Obsolete attributes map: attr_name -> (element_name -> suggestion message) *) 36 + let obsolete_attributes = 37 + let tbl = Hashtbl.create 64 in 38 + 39 + (* Helper to register an attribute for multiple elements *) 40 + let register attr_name elements suggestion = 41 + let element_map = 42 + match Hashtbl.find_opt tbl attr_name with 43 + | Some m -> m 44 + | None -> 45 + let m = Hashtbl.create 16 in 46 + Hashtbl.add tbl attr_name m; 47 + m 48 + in 49 + List.iter (fun elem -> Hashtbl.add element_map elem suggestion) elements 50 + in 51 + 52 + register "abbr" ["td"] 53 + "Consider instead beginning the cell contents with concise text, followed by further elaboration if needed."; 54 + 55 + register "accept" ["form"] 56 + "Use the \"accept\" attribute directly on the \"input\" elements instead."; 57 + 58 + register "archive" ["object"] 59 + "Use the \"data\" and \"type\" attributes to invoke plugins."; 60 + 61 + register "a" ["object"] 62 + "Use the \"data\" and \"type\" attributes to invoke plugins."; 63 + 64 + register "axis" ["td"; "th"] 65 + "Use the \"scope\" attribute."; 66 + 67 + register "border" ["input"; "img"; "object"; "table"] 68 + "Consider specifying \"img { border: 0; }\" in CSS instead."; 69 + 70 + register "charset" ["a"; "link"] 71 + "Use an HTTP Content-Type header on the linked resource instead."; 72 + 73 + register "classid" ["object"] 74 + "Use the \"data\" and \"type\" attributes to invoke plugins."; 75 + 76 + register "code" ["object"] 77 + "Use the \"data\" and \"type\" attributes to invoke plugins."; 78 + 79 + register "codebase" ["object"] 80 + "Use the \"data\" and \"type\" attributes to invoke plugins."; 81 + 82 + register "codetype" ["object"] 83 + "Use the \"data\" and \"type\" attributes to invoke plugins."; 84 + 85 + register "coords" ["a"] 86 + "Use \"area\" instead of \"a\" for image maps."; 87 + 88 + register "datafld" ["a"; "button"; "div"; "fieldset"; "iframe"; "img"; "input"; "label"; "legend"; "object"; "select"; "span"; "textarea"] 89 + "Use script and a mechanism such as XMLHttpRequest to populate the page dynamically"; 90 + 91 + register "dataformatas" ["button"; "div"; "input"; "label"; "legend"; "object"; "option"; "select"; "span"; "table"] 92 + "Use script and a mechanism such as XMLHttpRequest to populate the page dynamically"; 93 + 94 + register "datapagesize" ["table"] 95 + "You can safely omit it."; 96 + 97 + register "datasrc" ["a"; "button"; "div"; "iframe"; "img"; "input"; "label"; "legend"; "object"; "option"; "select"; "span"; "table"; "textarea"] 98 + "Use script and a mechanism such as XMLHttpRequest to populate the page dynamically"; 99 + 100 + register "declare" ["object"] 101 + "Repeat the \"object\" element completely each time the resource is to be reused."; 102 + 103 + register "event" ["script"] 104 + "Use DOM Events mechanisms to register event listeners."; 105 + 106 + register "for" ["script"] 107 + "Use DOM Events mechanisms to register event listeners."; 108 + 109 + register "hreflang" ["area"] 110 + "You can safely omit it."; 111 + 112 + register "ismap" ["input"] 113 + "You can safely omit it."; 114 + 115 + register "label" ["menu"] 116 + "Use script to handle \"contextmenu\" event instead."; 117 + 118 + register "language" ["script"] 119 + "Use the \"type\" attribute instead."; 120 + 121 + register "longdesc" ["iframe"; "img"] 122 + "Use a regular \"a\" element to link to the description."; 123 + 124 + register "lowsrc" ["img"] 125 + "Use a progressive JPEG image instead."; 126 + 127 + register "manifest" ["html"] 128 + "Use service workers instead."; 129 + 130 + register "methods" ["a"; "link"] 131 + "Use the HTTP OPTIONS feature instead."; 132 + 133 + register "name" ["a"; "embed"; "img"; "option"] 134 + "Use the \"id\" attribute instead."; 135 + 136 + register "nohref" ["area"] 137 + "Omitting the \"href\" attribute is sufficient."; 138 + 139 + register "profile" ["head"] 140 + "To declare which \"meta\" terms are used in the document, instead register the names as meta extensions. To trigger specific UA behaviors, use a \"link\" element instead."; 141 + 142 + register "scheme" ["meta"] 143 + "Use only one scheme per field, or make the scheme declaration part of the value."; 144 + 145 + register "scope" ["td"] 146 + "Use the \"scope\" attribute on a \"th\" element instead."; 147 + 148 + register "shape" ["a"] 149 + "Use \"area\" instead of \"a\" for image maps."; 150 + 151 + register "standby" ["object"] 152 + "Optimise the linked resource so that it loads quickly or, at least, incrementally."; 153 + 154 + register "summary" ["table"] 155 + "Consider describing the structure of the \"table\" in a \"caption\" element or in a \"figure\" element containing the \"table\"; or, simplify the structure of the \"table\" so that no description is needed."; 156 + 157 + register "target" ["link"] 158 + "You can safely omit it."; 159 + 160 + register "type" ["param"; "area"; "menu"] 161 + "You can safely omit it."; 162 + 163 + register "typemustmatch" ["object"] 164 + "Avoid using \"object\" elements with untrusted resources."; 165 + 166 + register "urn" ["a"; "link"] 167 + "Specify the preferred persistent identifier using the \"href\" attribute instead."; 168 + 169 + register "usemap" ["input"; "object"] 170 + "Use the \"img\" element instead."; 171 + 172 + register "valuetype" ["param"] 173 + "Use the \"name\" and \"value\" attributes without declaring value types."; 174 + 175 + register "version" ["html"] 176 + "You can safely omit it."; 177 + 178 + tbl 179 + 180 + (** Obsolete style attributes map: attr_name -> element_name list *) 181 + let obsolete_style_attrs = 182 + let tbl = Hashtbl.create 64 in 183 + 184 + let register attr_name elements = 185 + Hashtbl.add tbl attr_name elements 186 + in 187 + 188 + register "align" ["caption"; "iframe"; "img"; "input"; "object"; "embed"; "legend"; "table"; "hr"; "div"; "h1"; "h2"; "h3"; "h4"; "h5"; "h6"; "p"; "col"; "colgroup"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"]; 189 + register "alink" ["body"]; 190 + register "allowtransparency" ["iframe"]; 191 + register "background" ["body"; "table"; "thead"; "tbody"; "tfoot"; "tr"; "td"; "th"]; 192 + register "bgcolor" ["table"; "tr"; "td"; "th"; "body"]; 193 + register "bordercolor" ["table"]; 194 + register "cellpadding" ["table"]; 195 + register "cellspacing" ["table"]; 196 + register "char" ["col"; "colgroup"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"]; 197 + register "charoff" ["col"; "colgroup"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"]; 198 + register "clear" ["br"]; 199 + register "color" ["hr"]; 200 + register "compact" ["dl"; "menu"; "ol"; "ul"]; 201 + register "frameborder" ["iframe"]; 202 + register "framespacing" ["iframe"]; 203 + register "frame" ["table"]; 204 + register "height" ["table"; "thead"; "tbody"; "tfoot"; "tr"; "td"; "th"]; 205 + register "hspace" ["embed"; "iframe"; "input"; "img"; "object"]; 206 + register "link" ["body"]; 207 + register "bottommargin" ["body"]; 208 + register "marginheight" ["iframe"; "body"]; 209 + register "leftmargin" ["body"]; 210 + register "rightmargin" ["body"]; 211 + register "topmargin" ["body"]; 212 + register "marginwidth" ["iframe"; "body"]; 213 + register "noshade" ["hr"]; 214 + register "nowrap" ["td"; "th"]; 215 + register "rules" ["table"]; 216 + register "scrolling" ["iframe"]; 217 + register "size" ["hr"]; 218 + register "text" ["body"]; 219 + register "type" ["li"; "ul"]; 220 + register "valign" ["col"; "colgroup"; "tbody"; "td"; "tfoot"; "th"; "thead"; "tr"]; 221 + register "vlink" ["body"]; 222 + register "vspace" ["embed"; "iframe"; "input"; "img"; "object"]; 223 + register "width" ["hr"; "table"; "td"; "th"; "col"; "colgroup"; "pre"]; 224 + 225 + tbl 226 + 227 + (** Obsolete global attributes map: attr_name -> suggestion message *) 228 + let obsolete_global_attrs = 229 + let tbl = Hashtbl.create 8 in 230 + Hashtbl.add tbl "contextmenu" "Use script to handle \"contextmenu\" event instead."; 231 + Hashtbl.add tbl "dropzone" "Use script to handle the \"dragenter\" and \"dragover\" events instead."; 232 + Hashtbl.add tbl "onshow" "Use script to handle \"contextmenu\" event instead."; 233 + tbl 234 + 235 + (** Checker state *) 236 + type state = unit 237 + 238 + let create () = () 239 + 240 + let reset _state = () 241 + 242 + let start_element _state ~name ~namespace ~attrs collector = 243 + (* Only check HTML elements (no namespace or explicit HTML namespace) *) 244 + let is_html = match namespace with 245 + | None -> true 246 + | Some ns -> String.equal (String.lowercase_ascii ns) "html" 247 + in 248 + 249 + if not is_html then () 250 + else begin 251 + let name_lower = String.lowercase_ascii name in 252 + 253 + (* Check for obsolete element *) 254 + (match Hashtbl.find_opt obsolete_elements name_lower with 255 + | None -> () 256 + | Some suggestion -> 257 + let message = 258 + if String.length suggestion = 0 then 259 + Printf.sprintf "The \"%s\" element is obsolete." name 260 + else 261 + Printf.sprintf "The \"%s\" element is obsolete. %s" name suggestion 262 + in 263 + Message_collector.add_error collector 264 + ~message 265 + ~code:"obsolete-element" 266 + ~element:name 267 + ()); 268 + 269 + (* Check for obsolete attributes *) 270 + List.iter (fun (attr_name, _attr_value) -> 271 + let attr_lower = String.lowercase_ascii attr_name in 272 + 273 + (* Check specific obsolete attributes for this element *) 274 + (match Hashtbl.find_opt obsolete_attributes attr_lower with 275 + | None -> () 276 + | Some element_map -> 277 + (match Hashtbl.find_opt element_map name_lower with 278 + | None -> () 279 + | Some suggestion -> 280 + let message = 281 + Printf.sprintf "The \"%s\" attribute on the \"%s\" element is obsolete. %s" 282 + attr_name name suggestion 283 + in 284 + Message_collector.add_error collector 285 + ~message 286 + ~code:"obsolete-attribute" 287 + ~element:name 288 + ~attribute:attr_name 289 + ())); 290 + 291 + (* Check obsolete style attributes *) 292 + (match Hashtbl.find_opt obsolete_style_attrs attr_lower with 293 + | None -> () 294 + | Some elements -> 295 + if List.mem name_lower elements then 296 + let message = 297 + Printf.sprintf "The \"%s\" attribute on the \"%s\" element is obsolete. Use CSS instead." 298 + attr_name name 299 + in 300 + Message_collector.add_error collector 301 + ~message 302 + ~code:"obsolete-style-attribute" 303 + ~element:name 304 + ~attribute:attr_name 305 + ()); 306 + 307 + (* Check obsolete global attributes *) 308 + (match Hashtbl.find_opt obsolete_global_attrs attr_lower with 309 + | None -> () 310 + | Some suggestion -> 311 + let message = 312 + Printf.sprintf "The \"%s\" attribute is obsolete. %s" attr_name suggestion 313 + in 314 + Message_collector.add_error collector 315 + ~message 316 + ~code:"obsolete-global-attribute" 317 + ~element:name 318 + ~attribute:attr_name 319 + ()) 320 + ) attrs 321 + end 322 + 323 + let end_element _state ~name:_ ~namespace:_ _collector = () 324 + 325 + let characters _state _text _collector = () 326 + 327 + let end_document _state _collector = () 328 + 329 + let checker = 330 + let module M = struct 331 + type nonrec state = state 332 + let create = create 333 + let reset = reset 334 + let start_element = start_element 335 + let end_element = end_element 336 + let characters = characters 337 + let end_document = end_document 338 + end in 339 + (module M : Checker.S)
+67
lib/html5_checker/semantic/obsolete_checker.mli
··· 1 + (** Obsolete element and attribute checker. 2 + 3 + Reports errors for obsolete HTML elements and attributes that should 4 + not be used in modern HTML5 documents. 5 + 6 + This checker validates that documents do not use deprecated elements 7 + or attributes from earlier HTML versions. It reports: 8 + 9 + - {b Obsolete elements}: Elements like [<applet>], [<font>], [<center>] 10 + that have been removed from HTML5 11 + - {b Obsolete attributes}: Attributes like [align], [bgcolor], [border] 12 + that should be replaced with CSS 13 + - {b Obsolete global attributes}: Global attributes like [contextmenu] 14 + that are no longer supported 15 + 16 + {2 Obsolete Elements} 17 + 18 + Elements that are flagged as obsolete include: 19 + - Presentational elements: [<basefont>], [<big>], [<center>], [<font>], 20 + [<strike>], [<tt>] 21 + - Frame elements: [<frame>], [<frameset>], [<noframes>] 22 + - Deprecated interactive elements: [<applet>], [<bgsound>], [<keygen>] 23 + - Deprecated text elements: [<acronym>], [<dir>], [<listing>], [<xmp>] 24 + - And many others 25 + 26 + {2 Obsolete Attributes} 27 + 28 + The checker validates against hundreds of obsolete attributes, including: 29 + - Presentational attributes: [align], [bgcolor], [border], [color], 30 + [height], [width] (on certain elements) 31 + - Data binding attributes: [datafld], [dataformatas], [datasrc] 32 + - Navigation attributes: [longdesc], [methods], [urn] 33 + - And many element-specific obsolete attributes 34 + 35 + {2 Example} 36 + 37 + {[ 38 + let checker = Obsolete_checker.checker in 39 + let module C = (val checker : Checker.S) in 40 + let state = C.create () in 41 + 42 + (* This will emit an error *) 43 + C.start_element state ~name:"center" ~namespace:None ~attrs:[] collector; 44 + (* Error: Element "center" is obsolete. Use CSS instead. *) 45 + 46 + (* This will also emit an error *) 47 + C.start_element state ~name:"div" 48 + ~namespace:None 49 + ~attrs:[("align", "center")] 50 + collector; 51 + (* Error: Attribute "align" on element "div" is obsolete. Use CSS instead. *) 52 + ]} 53 + *) 54 + 55 + (** Include the standard checker signature. *) 56 + include Checker.S 57 + 58 + (** {1 Checker Instance} *) 59 + 60 + val checker : Checker.t 61 + (** [checker] is a pre-configured obsolete checker instance that can be 62 + registered with the checker registry. 63 + 64 + {b Example:} 65 + {[ 66 + Checker_registry.register registry "obsolete" Obsolete_checker.checker 67 + ]} *)
+143
lib/html5_checker/semantic/required_attr_checker.ml
··· 1 + (** Required attribute checker implementation. *) 2 + 3 + type state = { 4 + mutable _in_figure : bool; 5 + (** Track if we're inside a <figure> element (alt is more critical there) *) 6 + } 7 + 8 + let create () = { _in_figure = false } 9 + 10 + let reset state = state._in_figure <- false 11 + 12 + (** Check if an attribute list contains a specific attribute. *) 13 + let has_attr name attrs = 14 + List.exists (fun (attr_name, _) -> String.equal attr_name name) attrs 15 + 16 + (** Get the value of an attribute if present. *) 17 + let get_attr name attrs = 18 + List.find_map 19 + (fun (attr_name, value) -> 20 + if String.equal attr_name name then Some value else None) 21 + attrs 22 + 23 + let check_img_element attrs collector = 24 + (* Check for required src attribute *) 25 + if not (has_attr "src" attrs) then 26 + Message_collector.add_error collector ~message:"img element requires src attribute" 27 + ~code:"missing-required-attribute" ~element:"img" ~attribute:"src" (); 28 + 29 + (* Check for alt attribute - always required *) 30 + if not (has_attr "alt" attrs) then 31 + Message_collector.add_error collector 32 + ~message:"img element requires alt attribute for accessibility" 33 + ~code:"missing-required-attribute" ~element:"img" ~attribute:"alt" () 34 + 35 + let check_area_element attrs collector = 36 + (* area with href requires alt *) 37 + if has_attr "href" attrs && not (has_attr "alt" attrs) then 38 + Message_collector.add_error collector 39 + ~message:"area element with href requires alt attribute" ~code:"missing-required-attribute" 40 + ~element:"area" ~attribute:"alt" () 41 + 42 + let check_input_element attrs collector = 43 + match get_attr "type" attrs with 44 + | Some "image" -> 45 + (* input[type=image] requires alt *) 46 + if not (has_attr "alt" attrs) then 47 + Message_collector.add_error collector 48 + ~message:"input element with type=\"image\" requires alt attribute" 49 + ~code:"missing-required-attribute" ~element:"input" ~attribute:"alt" () 50 + | Some "hidden" -> 51 + (* input[type=hidden] should not have required attribute *) 52 + if has_attr "required" attrs then 53 + Message_collector.add_error collector 54 + ~message:"input element with type=\"hidden\" cannot have required attribute" 55 + ~code:"invalid-attribute-combination" ~element:"input" ~attribute:"required" () 56 + | Some "file" -> 57 + (* input[type=file] should not have value attribute *) 58 + if has_attr "value" attrs then 59 + Message_collector.add_warning collector 60 + ~message:"input element with type=\"file\" should not have value attribute" 61 + ~code:"invalid-attribute-combination" ~element:"input" ~attribute:"value" () 62 + | _ -> () 63 + 64 + let check_script_element attrs _collector = 65 + (* script requires src OR text content *) 66 + if not (has_attr "src" attrs) then 67 + (* We can't check for text content here; that would need to be done 68 + in end_element or with state tracking *) 69 + () 70 + 71 + let check_meta_element attrs collector = 72 + (* meta requires charset OR (name AND content) OR (http-equiv AND content) *) 73 + let has_charset = has_attr "charset" attrs in 74 + let has_name = has_attr "name" attrs in 75 + let has_content = has_attr "content" attrs in 76 + let has_http_equiv = has_attr "http-equiv" attrs in 77 + 78 + let valid = 79 + has_charset 80 + || (has_name && has_content) 81 + || (has_http_equiv && has_content) 82 + in 83 + 84 + if not valid then 85 + Message_collector.add_error collector 86 + ~message: 87 + "meta element requires either charset, or name+content, or http-equiv+content" 88 + ~code:"missing-required-attribute" ~element:"meta" () 89 + 90 + let check_link_element attrs collector = 91 + (* link[rel="stylesheet"] requires href *) 92 + match get_attr "rel" attrs with 93 + | Some rel when String.equal rel "stylesheet" -> 94 + if not (has_attr "href" attrs) then 95 + Message_collector.add_error collector 96 + ~message:"link element with rel=\"stylesheet\" requires href attribute" 97 + ~code:"missing-required-attribute" ~element:"link" ~attribute:"href" () 98 + | _ -> () 99 + 100 + let check_a_element attrs collector = 101 + (* a[download] requires href *) 102 + if has_attr "download" attrs && not (has_attr "href" attrs) then 103 + Message_collector.add_error collector 104 + ~message:"a element with download attribute requires href attribute" 105 + ~code:"missing-required-attribute" ~element:"a" ~attribute:"href" () 106 + 107 + let check_map_element attrs collector = 108 + (* map requires name *) 109 + if not (has_attr "name" attrs) then 110 + Message_collector.add_error collector 111 + ~message:"map element requires name attribute" ~code:"missing-required-attribute" 112 + ~element:"map" ~attribute:"name" () 113 + 114 + let start_element state ~name ~namespace:_ ~attrs collector = 115 + match name with 116 + | "img" -> check_img_element attrs collector 117 + | "area" -> check_area_element attrs collector 118 + | "input" -> check_input_element attrs collector 119 + | "script" -> check_script_element attrs collector 120 + | "meta" -> check_meta_element attrs collector 121 + | "link" -> check_link_element attrs collector 122 + | "a" -> check_a_element attrs collector 123 + | "map" -> check_map_element attrs collector 124 + | "figure" -> state._in_figure <- true 125 + | _ -> () 126 + 127 + let end_element state ~name ~namespace:_ _collector = 128 + match name with "figure" -> state._in_figure <- false | _ -> () 129 + 130 + let characters _state _text _collector = () 131 + 132 + let end_document _state _collector = () 133 + 134 + let checker = (module struct 135 + type nonrec state = state 136 + 137 + let create = create 138 + let reset = reset 139 + let start_element = start_element 140 + let end_element = end_element 141 + let characters = characters 142 + let end_document = end_document 143 + end : Checker.S)
+50
lib/html5_checker/semantic/required_attr_checker.mli
··· 1 + (** Required attribute checker. 2 + 3 + Validates that elements have their required attributes according to the 4 + HTML5 specification. This checker ensures that: 5 + 6 + - Elements have all mandatory attributes 7 + - Conditional attributes are present when required by context 8 + - Attributes that must appear together are all present 9 + 10 + {2 Validation Rules} 11 + 12 + The checker validates these common required attributes: 13 + 14 + {b Images and Media} 15 + - [img] requires [src] attribute 16 + - [img] requires [alt] attribute (error in most contexts, warning otherwise) 17 + - [area] with [href] requires [alt] attribute 18 + - [input\[type=image\]] requires [alt] attribute 19 + 20 + {b Forms} 21 + - [input] defaults to [type="text"] if [type] is omitted 22 + - [map] requires [name] attribute 23 + 24 + {b Scripts and Styles} 25 + - [script] requires either [src] attribute OR text content 26 + - [style] with [scoped] requires appropriate positioning 27 + 28 + {b Metadata} 29 + - [meta] requires one of: 30 + - [charset] attribute, OR 31 + - [name] and [content] attributes, OR 32 + - [http-equiv] and [content] attributes 33 + - [link\[rel="stylesheet"\]] requires [href] attribute 34 + 35 + {b Links} 36 + - [a] with [download] attribute requires [href] attribute 37 + 38 + @see <https://html.spec.whatwg.org/multipage/indices.html#attributes-3> 39 + WHATWG HTML: Attributes *) 40 + 41 + include Checker.S 42 + 43 + val checker : Checker.t 44 + (** A first-class module instance of this checker. 45 + 46 + {b Usage:} 47 + {[ 48 + let checker = Required_attr_checker.checker in 49 + Checker_registry.register "required-attributes" checker 50 + ]} *)
+404
lib/html5_checker/specialized/aria_checker.ml
··· 1 + (** ARIA validation checker implementation. *) 2 + 3 + (** Valid WAI-ARIA 1.2 roles. 4 + 5 + These are all the valid role values according to the WAI-ARIA 1.2 6 + specification. Abstract roles are included but should not be used 7 + in HTML content. *) 8 + let valid_aria_roles = 9 + let roles = [ 10 + (* Document structure roles *) 11 + "article"; "associationlist"; "associationlistitemkey"; 12 + "associationlistitemvalue"; "blockquote"; "caption"; "cell"; "code"; 13 + "definition"; "deletion"; "directory"; "document"; "emphasis"; "feed"; 14 + "figure"; "generic"; "group"; "heading"; "img"; "insertion"; "list"; 15 + "listitem"; "mark"; "math"; "meter"; "none"; "note"; "paragraph"; 16 + "presentation"; "row"; "rowgroup"; "strong"; "subscript"; "suggestion"; 17 + "superscript"; "table"; "term"; "time"; "toolbar"; "tooltip"; 18 + 19 + (* Widget roles *) 20 + "button"; "checkbox"; "combobox"; "dialog"; "grid"; "gridcell"; "link"; 21 + "listbox"; "menu"; "menubar"; "menuitem"; "menuitemcheckbox"; 22 + "menuitemradio"; "option"; "progressbar"; "radio"; "radiogroup"; 23 + "scrollbar"; "searchbox"; "separator"; "slider"; "spinbutton"; "switch"; 24 + "tab"; "tablist"; "tabpanel"; "textbox"; "tree"; "treegrid"; "treeitem"; 25 + 26 + (* Landmark roles *) 27 + "banner"; "complementary"; "contentinfo"; "form"; "main"; "navigation"; 28 + "region"; "search"; 29 + 30 + (* Live region roles *) 31 + "alert"; "log"; "marquee"; "status"; "timer"; 32 + 33 + (* Window roles *) 34 + "alertdialog"; 35 + 36 + (* Abstract roles - not for use in HTML content *) 37 + "command"; "comment"; "composite"; "input"; "landmark"; "range"; 38 + "roletype"; "section"; "sectionhead"; "select"; "structure"; "widget"; 39 + "window"; 40 + 41 + (* Additional roles *) 42 + "application"; "columnheader"; "rowheader"; 43 + ] in 44 + let tbl = Hashtbl.create (List.length roles) in 45 + List.iter (fun role -> Hashtbl.add tbl role ()) roles; 46 + tbl 47 + 48 + (** Roles that cannot have accessible names. 49 + 50 + These roles must not have aria-label or aria-labelledby attributes. *) 51 + let roles_which_cannot_be_named = 52 + let roles = [ 53 + "caption"; "code"; "deletion"; "emphasis"; "generic"; "insertion"; 54 + "paragraph"; "presentation"; "strong"; "subscript"; "superscript" 55 + ] in 56 + let tbl = Hashtbl.create (List.length roles) in 57 + List.iter (fun role -> Hashtbl.add tbl role ()) roles; 58 + tbl 59 + 60 + (** Map from descendant role to set of required ancestor roles. *) 61 + let required_role_ancestor_by_descendant : (string, string list) Hashtbl.t = 62 + Hashtbl.create 32 63 + 64 + (** Register that a descendant role requires one of the given ancestor roles. *) 65 + let register_required_ancestor_role parents child = 66 + Hashtbl.add required_role_ancestor_by_descendant child parents 67 + 68 + (** Initialize required ancestor role relationships. *) 69 + let () = 70 + register_required_ancestor_role ["listbox"] "option"; 71 + register_required_ancestor_role ["menu"; "menubar"] "menuitem"; 72 + register_required_ancestor_role ["menu"; "menubar"] "menuitemcheckbox"; 73 + register_required_ancestor_role ["menu"; "menubar"] "menuitemradio"; 74 + register_required_ancestor_role ["tablist"] "tab"; 75 + register_required_ancestor_role ["tree"; "group"] "treeitem"; 76 + register_required_ancestor_role ["list"; "group"] "listitem"; 77 + register_required_ancestor_role ["row"] "cell"; 78 + register_required_ancestor_role ["row"] "gridcell"; 79 + register_required_ancestor_role ["row"] "columnheader"; 80 + register_required_ancestor_role ["row"] "rowheader"; 81 + register_required_ancestor_role ["grid"; "rowgroup"; "table"; "treegrid"] "row"; 82 + register_required_ancestor_role ["grid"; "table"; "treegrid"] "rowgroup" 83 + 84 + (** Map from HTML element name to implicit ARIA role. *) 85 + let elements_with_implicit_role : (string, string) Hashtbl.t = 86 + let tbl = Hashtbl.create 64 in 87 + Hashtbl.add tbl "article" "article"; 88 + Hashtbl.add tbl "aside" "complementary"; 89 + Hashtbl.add tbl "body" "document"; 90 + Hashtbl.add tbl "button" "button"; 91 + Hashtbl.add tbl "datalist" "listbox"; 92 + Hashtbl.add tbl "dd" "definition"; 93 + Hashtbl.add tbl "details" "group"; 94 + Hashtbl.add tbl "dialog" "dialog"; 95 + Hashtbl.add tbl "dfn" "term"; 96 + Hashtbl.add tbl "dt" "term"; 97 + Hashtbl.add tbl "fieldset" "group"; 98 + Hashtbl.add tbl "figure" "figure"; 99 + Hashtbl.add tbl "form" "form"; 100 + Hashtbl.add tbl "footer" "contentinfo"; 101 + Hashtbl.add tbl "h1" "heading"; 102 + Hashtbl.add tbl "h2" "heading"; 103 + Hashtbl.add tbl "h3" "heading"; 104 + Hashtbl.add tbl "h4" "heading"; 105 + Hashtbl.add tbl "h5" "heading"; 106 + Hashtbl.add tbl "h6" "heading"; 107 + Hashtbl.add tbl "hr" "separator"; 108 + Hashtbl.add tbl "header" "banner"; 109 + Hashtbl.add tbl "img" "img"; 110 + Hashtbl.add tbl "li" "listitem"; 111 + Hashtbl.add tbl "link" "link"; 112 + Hashtbl.add tbl "main" "main"; 113 + Hashtbl.add tbl "nav" "navigation"; 114 + Hashtbl.add tbl "ol" "list"; 115 + Hashtbl.add tbl "output" "status"; 116 + Hashtbl.add tbl "progress" "progressbar"; 117 + Hashtbl.add tbl "section" "region"; 118 + Hashtbl.add tbl "summary" "button"; 119 + Hashtbl.add tbl "s" "deletion"; 120 + Hashtbl.add tbl "table" "table"; 121 + Hashtbl.add tbl "tbody" "rowgroup"; 122 + Hashtbl.add tbl "textarea" "textbox"; 123 + Hashtbl.add tbl "tfoot" "rowgroup"; 124 + Hashtbl.add tbl "thead" "rowgroup"; 125 + Hashtbl.add tbl "td" "cell"; 126 + Hashtbl.add tbl "tr" "row"; 127 + Hashtbl.add tbl "ul" "list"; 128 + tbl 129 + 130 + (** Map from HTML element name to multiple possible implicit roles. 131 + 132 + Some elements like 'th' can have different implicit roles depending 133 + on context (columnheader or rowheader). *) 134 + let elements_with_implicit_roles : (string, string array) Hashtbl.t = 135 + let tbl = Hashtbl.create 4 in 136 + Hashtbl.add tbl "th" [| "columnheader"; "rowheader" |]; 137 + tbl 138 + 139 + (** Map from input type to implicit ARIA role. *) 140 + let input_types_with_implicit_role : (string, string) Hashtbl.t = 141 + let tbl = Hashtbl.create 8 in 142 + Hashtbl.add tbl "button" "button"; 143 + Hashtbl.add tbl "checkbox" "checkbox"; 144 + Hashtbl.add tbl "image" "button"; 145 + Hashtbl.add tbl "number" "spinbutton"; 146 + Hashtbl.add tbl "radio" "radio"; 147 + Hashtbl.add tbl "range" "slider"; 148 + Hashtbl.add tbl "reset" "button"; 149 + Hashtbl.add tbl "submit" "button"; 150 + tbl 151 + 152 + (** Map from ARIA attribute to array of roles for which it is deprecated. 153 + 154 + These attributes should not be used on elements with these roles. *) 155 + let aria_deprecated_attributes_by_role : (string, string array) Hashtbl.t = 156 + let tbl = Hashtbl.create 8 in 157 + 158 + (* aria-disabled deprecated for many roles *) 159 + Hashtbl.add tbl "aria-disabled" [| 160 + "alert"; "alertdialog"; "article"; "associationlist"; 161 + "associationlistitemkey"; "associationlistitemvalue"; "banner"; 162 + "blockquote"; "caption"; "cell"; "code"; "command"; "comment"; 163 + "complementary"; "contentinfo"; "definition"; "deletion"; 164 + "dialog"; "directory"; "document"; "emphasis"; "feed"; "figure"; 165 + "form"; "generic"; "heading"; "img"; "insertion"; "landmark"; 166 + "list"; "listitem"; "log"; "main"; "mark"; "marquee"; "math"; 167 + "meter"; "navigation"; "note"; "paragraph"; "presentation"; 168 + "progressbar"; "range"; "region"; "rowgroup"; "search"; 169 + "section"; "sectionhead"; "status"; "strong"; "structure"; 170 + "subscript"; "suggestion"; "superscript"; "table"; "tabpanel"; 171 + "term"; "time"; "timer"; "tooltip"; "widget"; "window" 172 + |]; 173 + 174 + (* aria-errormessage deprecated for many roles *) 175 + Hashtbl.add tbl "aria-errormessage" [| 176 + "alert"; "alertdialog"; "article"; "associationlist"; 177 + "associationlistitemkey"; "associationlistitemvalue"; "banner"; 178 + "blockquote"; "button"; "caption"; "cell"; "code"; "command"; 179 + "comment"; "complementary"; "composite"; "contentinfo"; 180 + "definition"; "deletion"; "dialog"; "directory"; "document"; 181 + "emphasis"; "feed"; "figure"; "form"; "generic"; "grid"; "group"; 182 + "heading"; "img"; "input"; "insertion"; "landmark"; "link"; "list"; 183 + "listitem"; "log"; "main"; "mark"; "marquee"; "math"; "menu"; 184 + "menubar"; "menuitem"; "menuitemcheckbox"; "menuitemradio"; 185 + "meter"; "navigation"; "note"; "option"; "paragraph"; 186 + "presentation"; "progressbar"; "radio"; "range"; "region"; "row"; 187 + "rowgroup"; "scrollbar"; "search"; "section"; "sectionhead"; 188 + "select"; "separator"; "status"; "strong"; "structure"; 189 + "subscript"; "suggestion"; "superscript"; "tab"; "table"; 190 + "tablist"; "tabpanel"; "term"; "time"; "timer"; "toolbar"; 191 + "tooltip"; "treeitem"; "widget"; "window" 192 + |]; 193 + 194 + (* aria-haspopup deprecated for many roles *) 195 + Hashtbl.add tbl "aria-haspopup" [| 196 + "alert"; "alertdialog"; "article"; "associationlist"; 197 + "associationlistitemkey"; "associationlistitemvalue"; "banner"; 198 + "blockquote"; "caption"; "cell"; "checkbox"; "code"; "command"; 199 + "comment"; "complementary"; "composite"; "contentinfo"; 200 + "definition"; "deletion"; "dialog"; "directory"; "document"; 201 + "emphasis"; "feed"; "figure"; "form"; "generic"; "grid"; "group"; 202 + "heading"; "img"; "input"; "insertion"; "landmark"; "list"; 203 + "listbox"; "listitem"; "log"; "main"; "mark"; "marquee"; "math"; 204 + "menu"; "menubar"; "meter"; "navigation"; "note"; "option"; 205 + "paragraph"; "presentation"; "progressbar"; "radio"; "radiogroup"; 206 + "range"; "region"; "row"; "rowgroup"; "scrollbar"; "search"; 207 + "section"; "sectionhead"; "select"; "separator"; "spinbutton"; 208 + "status"; "strong"; "structure"; "subscript"; "suggestion"; 209 + "superscript"; "switch"; "table"; "tablist"; "tabpanel"; "term"; 210 + "time"; "timer"; "toolbar"; "tooltip"; "tree"; "treegrid"; 211 + "widget"; "window" 212 + |]; 213 + 214 + (* aria-invalid deprecated for many roles *) 215 + Hashtbl.add tbl "aria-invalid" [| 216 + "alert"; "alertdialog"; "article"; "associationlist"; 217 + "associationlistitemkey"; "associationlistitemvalue"; "banner"; 218 + "blockquote"; "button"; "caption"; "cell"; "code"; "command"; 219 + "comment"; "complementary"; "composite"; "contentinfo"; 220 + "definition"; "deletion"; "dialog"; "directory"; "document"; 221 + "emphasis"; "feed"; "figure"; "form"; "generic"; "grid"; "group"; 222 + "heading"; "img"; "input"; "insertion"; "landmark"; "link"; "list"; 223 + "listitem"; "log"; "main"; "mark"; "marquee"; "math"; "menu"; 224 + "menubar"; "menuitem"; "menuitemcheckbox"; "menuitemradio"; 225 + "meter"; "navigation"; "note"; "option"; "paragraph"; 226 + "presentation"; "progressbar"; "radio"; "range"; "region"; "row"; 227 + "rowgroup"; "scrollbar"; "search"; "section"; "sectionhead"; 228 + "select"; "separator"; "status"; "strong"; "structure"; 229 + "subscript"; "suggestion"; "superscript"; "tab"; "table"; 230 + "tablist"; "tabpanel"; "term"; "time"; "timer"; "toolbar"; 231 + "tooltip"; "treeitem"; "widget"; "window" 232 + |]; 233 + 234 + (* aria-level deprecated for listitem *) 235 + Hashtbl.add tbl "aria-level" [| "listitem" |]; 236 + 237 + tbl 238 + 239 + (** Split a role attribute value into individual roles. 240 + 241 + The role attribute can contain multiple space-separated role tokens. *) 242 + let split_roles role_value = 243 + let trimmed = String.trim role_value in 244 + if trimmed = "" then [] 245 + else 246 + String.split_on_char ' ' trimmed 247 + |> List.filter (fun s -> String.trim s <> "") 248 + |> List.map String.lowercase_ascii 249 + 250 + (** Get the implicit role for an HTML element. *) 251 + let get_implicit_role element_name attrs = 252 + (* Check for input element with type attribute *) 253 + if element_name = "input" then begin 254 + match List.assoc_opt "type" attrs with 255 + | Some input_type -> 256 + let input_type = String.lowercase_ascii input_type in 257 + Hashtbl.find_opt input_types_with_implicit_role input_type 258 + | None -> Some "textbox" (* default input type is text *) 259 + end 260 + else 261 + Hashtbl.find_opt elements_with_implicit_role element_name 262 + 263 + (** Get all possible implicit roles for an element (for elements like 'th'). 264 + 265 + Note: This function is not currently used but is provided for completeness. *) 266 + let _get_implicit_roles element_name = 267 + match Hashtbl.find_opt elements_with_implicit_roles element_name with 268 + | Some roles -> Array.to_list roles 269 + | None -> 270 + match get_implicit_role element_name [] with 271 + | Some role -> [role] 272 + | None -> [] 273 + 274 + (** Stack node representing an element in the ancestor chain. *) 275 + type stack_node = { 276 + explicit_roles : string list; 277 + implicit_role : string option; 278 + } 279 + 280 + (** Checker state. *) 281 + type state = { 282 + mutable stack : stack_node list; 283 + } 284 + 285 + let create () = { stack = [] } 286 + 287 + let reset state = state.stack <- [] 288 + 289 + (** Check if any ancestor has one of the required roles. *) 290 + let has_required_ancestor_role state required_roles = 291 + List.exists (fun ancestor -> 292 + (* Check explicit roles *) 293 + List.exists (fun role -> 294 + List.mem role required_roles 295 + ) ancestor.explicit_roles 296 + || 297 + (* Check implicit role *) 298 + match ancestor.implicit_role with 299 + | Some implicit_role -> List.mem implicit_role required_roles 300 + | None -> false 301 + ) state.stack 302 + 303 + (** Render a list of roles as a human-readable string. *) 304 + let render_role_set roles = 305 + match roles with 306 + | [] -> "" 307 + | [role] -> "\"" ^ role ^ "\"" 308 + | _ -> 309 + let quoted = List.map (fun r -> "\"" ^ r ^ "\"") roles in 310 + String.concat " or " quoted 311 + 312 + let start_element state ~name ~namespace ~attrs collector = 313 + (* Only process HTML elements *) 314 + match namespace with 315 + | Some _ -> () (* Skip non-HTML elements *) 316 + | None -> 317 + let role_attr = List.assoc_opt "role" attrs in 318 + let aria_label = List.assoc_opt "aria-label" attrs in 319 + let aria_labelledby = List.assoc_opt "aria-labelledby" attrs in 320 + let has_accessible_name = 321 + (match aria_label with Some v -> String.trim v <> "" | None -> false) || 322 + (match aria_labelledby with Some v -> String.trim v <> "" | None -> false) 323 + in 324 + 325 + (* Parse explicit roles from role attribute *) 326 + let explicit_roles = match role_attr with 327 + | Some role_value -> split_roles role_value 328 + | None -> [] 329 + in 330 + 331 + (* Get implicit role for this element *) 332 + let implicit_role = get_implicit_role name attrs in 333 + 334 + (* Validate explicit roles *) 335 + List.iter (fun role -> 336 + (* Check if role is valid *) 337 + if not (Hashtbl.mem valid_aria_roles role) then 338 + Message_collector.add_error collector 339 + ~message:(Printf.sprintf "Invalid ARIA role \"%s\"." role) (); 340 + 341 + (* Check if role cannot be named *) 342 + if Hashtbl.mem roles_which_cannot_be_named role && has_accessible_name then 343 + Message_collector.add_error collector 344 + ~message:(Printf.sprintf 345 + "Elements with role=\"%s\" must not have accessible names (via aria-label or aria-labelledby)." 346 + role) (); 347 + 348 + (* Check for required ancestor roles *) 349 + begin match Hashtbl.find_opt required_role_ancestor_by_descendant role with 350 + | Some required_ancestors -> 351 + if not (has_required_ancestor_role state required_ancestors) then 352 + Message_collector.add_error collector 353 + ~message:(Printf.sprintf 354 + "An element with \"role=%s\" must be contained in, or owned by, an element with the \"role\" value %s." 355 + role 356 + (render_role_set required_ancestors)) () 357 + | None -> () 358 + end; 359 + 360 + (* Check for deprecated ARIA attributes for this role *) 361 + List.iter (fun (attr_name, _attr_value) -> 362 + if String.starts_with ~prefix:"aria-" attr_name then 363 + match Hashtbl.find_opt aria_deprecated_attributes_by_role attr_name with 364 + | Some deprecated_for_roles -> 365 + (* Check if current role is in the deprecated list *) 366 + if Array.mem role deprecated_for_roles then 367 + Message_collector.add_warning collector 368 + ~message:(Printf.sprintf 369 + "The \"%s\" attribute should not be used on any element which has \"role=%s\"." 370 + attr_name role) () 371 + | None -> () 372 + ) attrs 373 + ) explicit_roles; 374 + 375 + (* Push current element onto stack *) 376 + let node = { 377 + explicit_roles; 378 + implicit_role; 379 + } in 380 + state.stack <- node :: state.stack 381 + 382 + let end_element state ~name:_ ~namespace _collector = 383 + (* Only process HTML elements *) 384 + match namespace with 385 + | Some _ -> () (* Skip non-HTML elements *) 386 + | None -> 387 + (* Pop from stack *) 388 + match state.stack with 389 + | _ :: rest -> state.stack <- rest 390 + | [] -> () (* Stack underflow - shouldn't happen in well-formed docs *) 391 + 392 + let characters _state _text _collector = () 393 + 394 + let end_document _state _collector = () 395 + 396 + let checker = (module struct 397 + type nonrec state = state 398 + let create = create 399 + let reset = reset 400 + let start_element = start_element 401 + let end_element = end_element 402 + let characters = characters 403 + let end_document = end_document 404 + end : Checker.S)
+102
lib/html5_checker/specialized/aria_checker.mli
··· 1 + (** ARIA (Accessible Rich Internet Applications) validation checker. 2 + 3 + Validates ARIA roles, required ancestor roles, implicit roles, and 4 + deprecated ARIA attributes according to the WAI-ARIA specification. 5 + 6 + {2 Validation Rules} 7 + 8 + The checker enforces the following ARIA validation rules: 9 + 10 + {3 Valid ARIA Roles} 11 + 12 + All valid WAI-ARIA 1.2 roles are recognized: 13 + 14 + - {b Document structure roles}: article, definition, directory, document, 15 + feed, figure, group, heading, img, list, listitem, math, none, note, 16 + presentation, region, separator, table, term, toolbar, tooltip 17 + - {b Widget roles}: button, checkbox, combobox, dialog, grid, gridcell, 18 + link, listbox, menu, menubar, menuitem, menuitemcheckbox, 19 + menuitemradio, option, progressbar, radio, radiogroup, scrollbar, 20 + slider, spinbutton, switch, tab, tablist, tabpanel, textbox, tree, 21 + treegrid, treeitem 22 + - {b Landmark roles}: banner, complementary, contentinfo, form, main, 23 + navigation, search 24 + - {b Live region roles}: alert, log, marquee, status, timer 25 + - {b Window roles}: alertdialog 26 + - {b Abstract roles} (not for use in content): command, composite, input, 27 + landmark, range, roletype, section, sectionhead, select, structure, 28 + widget, window 29 + 30 + {3 Required Ancestor Roles} 31 + 32 + Certain ARIA roles require specific ancestor roles: 33 + 34 + - [option] requires [listbox] 35 + - [menuitem], [menuitemcheckbox], [menuitemradio] require [menu] or 36 + [menubar] 37 + - [tab] requires [tablist] 38 + - [treeitem] requires [tree] or [group] 39 + - [listitem] requires [list] or [group] 40 + - [cell], [gridcell], [columnheader], [rowheader] require [row] 41 + - [row] requires [grid], [rowgroup], [table], or [treegrid] 42 + - [rowgroup] requires [grid], [table], or [treegrid] 43 + 44 + {3 Roles That Cannot Be Named} 45 + 46 + These roles must not have accessible names (via aria-label or 47 + aria-labelledby): 48 + 49 + - caption, code, deletion, emphasis, generic, insertion, paragraph, 50 + presentation, strong, subscript, superscript 51 + 52 + {3 Implicit ARIA Roles} 53 + 54 + HTML elements have implicit ARIA roles: 55 + 56 + - [<article>] has implicit role [article] 57 + - [<aside>] has implicit role [complementary] 58 + - [<button>] has implicit role [button] 59 + - [<dialog>] has implicit role [dialog] 60 + - [<footer>] has implicit role [contentinfo] 61 + - [<header>] has implicit role [banner] 62 + - [<main>] has implicit role [main] 63 + - [<nav>] has implicit role [navigation] 64 + - And many more... 65 + 66 + {3 Deprecated ARIA Attributes} 67 + 68 + Certain ARIA attributes are deprecated for specific roles: 69 + 70 + - [aria-disabled] is deprecated for: alert, article, banner, cell, 71 + document, feed, figure, heading, img, list, listitem, main, navigation, 72 + region, and many others 73 + - [aria-errormessage] is deprecated for: alert, article, banner, button, 74 + cell, document, link, and many others 75 + - [aria-haspopup] is deprecated for: alert, article, checkbox, listbox, 76 + and many others 77 + - [aria-invalid] is deprecated for: alert, article, button, cell, 78 + document, link, and many others 79 + - [aria-level] is deprecated for: listitem 80 + 81 + {2 Implementation Details} 82 + 83 + The checker maintains a stack of ancestor elements with their roles 84 + (explicit and implicit) to validate required ancestor relationships. 85 + When an element with a role attribute is encountered, the checker: 86 + 87 + 1. Parses the role attribute value (space-separated tokens) 88 + 2. Validates each role against the list of valid roles 89 + 3. Checks if the role requires an ancestor role 90 + 4. Verifies the required ancestor is present in the ancestor stack 91 + 5. Checks for deprecated ARIA attributes on elements with specific roles 92 + 6. Validates that roles which cannot be named do not have aria-label or 93 + aria-labelledby attributes 94 + 95 + @see <https://www.w3.org/TR/wai-aria-1.2/> 96 + WAI-ARIA 1.2 specification 97 + *) 98 + 99 + include Checker.S 100 + 101 + val checker : Checker.t 102 + (** [checker] is a checker instance for validating ARIA roles and attributes. *)
+157
lib/html5_checker/specialized/heading_checker.ml
··· 1 + (** Heading structure validation checker. 2 + 3 + This checker validates that: 4 + - Heading levels don't skip (e.g., h1 to h3) 5 + - Documents have at least one heading 6 + - Multiple h1 usage is noted 7 + - Headings are not empty *) 8 + 9 + (** Checker state tracking heading structure. *) 10 + type state = { 11 + mutable current_level : int option; 12 + mutable h1_count : int; 13 + mutable has_any_heading : bool; 14 + mutable first_heading_checked : bool; 15 + mutable in_heading : string option; 16 + mutable heading_has_text : bool; 17 + } 18 + 19 + let create () = 20 + { 21 + current_level = None; 22 + h1_count = 0; 23 + has_any_heading = false; 24 + first_heading_checked = false; 25 + in_heading = None; 26 + heading_has_text = false; 27 + } 28 + 29 + let reset state = 30 + state.current_level <- None; 31 + state.h1_count <- 0; 32 + state.has_any_heading <- false; 33 + state.first_heading_checked <- false; 34 + state.in_heading <- None; 35 + state.heading_has_text <- false 36 + 37 + (** Extract heading level from tag name (e.g., "h1" -> 1). *) 38 + let heading_level name = 39 + match String.lowercase_ascii name with 40 + | "h1" -> Some 1 41 + | "h2" -> Some 2 42 + | "h3" -> Some 3 43 + | "h4" -> Some 4 44 + | "h5" -> Some 5 45 + | "h6" -> Some 6 46 + | _ -> None 47 + 48 + (** Check if text is effectively empty (only whitespace). *) 49 + let is_empty_text text = 50 + let rec check i = 51 + if i >= String.length text then 52 + true 53 + else 54 + match text.[i] with 55 + | ' ' | '\t' | '\n' | '\r' -> check (i + 1) 56 + | _ -> false 57 + in 58 + check 0 59 + 60 + let start_element state ~name ~namespace:_ ~attrs:_ collector = 61 + match heading_level name with 62 + | Some level -> 63 + state.has_any_heading <- true; 64 + 65 + (* Check if this is the first heading *) 66 + if not state.first_heading_checked then begin 67 + state.first_heading_checked <- true; 68 + if level <> 1 then 69 + Message_collector.add_warning collector 70 + ~message:(Printf.sprintf 71 + "First heading in document is <%s>, should typically be <h1>" 72 + name) 73 + ~code:"first-heading-not-h1" 74 + ~element:name 75 + () 76 + end; 77 + 78 + (* Track h1 count *) 79 + if level = 1 then begin 80 + state.h1_count <- state.h1_count + 1; 81 + if state.h1_count > 1 then 82 + Message_collector.add_warning collector 83 + ~message:"Multiple <h1> elements detected. While valid in HTML5 sectioning content, traditional advice suggests one <h1> per page" 84 + ~code:"multiple-h1" 85 + ~element:name 86 + () 87 + end; 88 + 89 + (* Check for skipped levels *) 90 + begin match state.current_level with 91 + | None -> 92 + state.current_level <- Some level 93 + | Some prev_level -> 94 + let diff = level - prev_level in 95 + if diff > 1 then 96 + Message_collector.add_warning collector 97 + ~message:(Printf.sprintf 98 + "Heading level skipped: <%s> follows <h%d>, skipping %d level%s. This can confuse screen reader users" 99 + name prev_level (diff - 1) (if diff > 2 then "s" else "")) 100 + ~code:"heading-level-skipped" 101 + ~element:name 102 + (); 103 + state.current_level <- Some level 104 + end; 105 + 106 + (* Track that we're in a heading to check for empty content *) 107 + state.in_heading <- Some name; 108 + state.heading_has_text <- false 109 + 110 + | None -> 111 + (* Not a heading element *) 112 + () 113 + 114 + let end_element state ~name ~namespace:_ collector = 115 + match state.in_heading with 116 + | Some heading when heading = name -> 117 + (* Exiting the heading we're tracking *) 118 + if not state.heading_has_text then 119 + Message_collector.add_error collector 120 + ~message:(Printf.sprintf 121 + "Heading <%s> is empty or contains only whitespace. Empty headings are problematic for screen readers" 122 + name) 123 + ~code:"empty-heading" 124 + ~element:name 125 + (); 126 + state.in_heading <- None; 127 + state.heading_has_text <- false 128 + | _ -> 129 + () 130 + 131 + let characters state text _collector = 132 + (* If we're inside a heading, check if this text is non-whitespace *) 133 + match state.in_heading with 134 + | Some _ -> 135 + if not (is_empty_text text) then 136 + state.heading_has_text <- true 137 + | None -> 138 + () 139 + 140 + let end_document state collector = 141 + (* Check if document has any headings *) 142 + if not state.has_any_heading then 143 + Message_collector.add_warning collector 144 + ~message:"Document contains no heading elements (h1-h6). Headings provide important document structure for accessibility" 145 + ~code:"no-headings" 146 + () 147 + 148 + let checker = (module struct 149 + type nonrec state = state 150 + 151 + let create = create 152 + let reset = reset 153 + let start_element = start_element 154 + let end_element = end_element 155 + let characters = characters 156 + let end_document = end_document 157 + end : Checker.S)
+12
lib/html5_checker/specialized/heading_checker.mli
··· 1 + (** Heading structure validation checker. 2 + 3 + Validates: 4 + - Proper heading level hierarchy (no skipped levels) 5 + - Document should have at least one heading 6 + - Multiple h1 usage patterns 7 + - Headings should not be empty *) 8 + 9 + include Checker.S 10 + 11 + val checker : Checker.t 12 + (** [checker] is a checker instance for validating heading structure. *)
+111
lib/html5_checker/specialized/language_checker.ml
··· 1 + (** Language attribute validation checker. 2 + 3 + Validates language attributes. *) 4 + 5 + (** Checker state tracking language attributes. *) 6 + type state = { 7 + mutable html_element_seen : bool; 8 + mutable html_has_lang : bool; 9 + } 10 + 11 + let create () = 12 + { 13 + html_element_seen = false; 14 + html_has_lang = false; 15 + } 16 + 17 + let reset state = 18 + state.html_element_seen <- false; 19 + state.html_has_lang <- false 20 + 21 + (** Get attribute value from attribute list. *) 22 + let get_attr attrs name = 23 + try Some (List.assoc name attrs) 24 + with Not_found -> None 25 + 26 + (** Validate language attribute. *) 27 + let validate_lang_attr value ~location ~element collector = 28 + match Dt_language.Language_or_empty.validate value with 29 + | Ok () -> () 30 + | Error msg -> 31 + Message_collector.add_error collector 32 + ~message:(Printf.sprintf "Invalid lang attribute: %s" msg) 33 + ~code:"invalid-lang" 34 + ?location 35 + ~element 36 + ~attribute:"lang" 37 + () 38 + 39 + (** Check if lang and xml:lang match. *) 40 + let check_lang_xmllang_match ~lang ~xmllang ~location ~element collector = 41 + if lang <> xmllang then 42 + Message_collector.add_warning collector 43 + ~message:(Printf.sprintf 44 + "lang attribute '%s' does not match xml:lang attribute '%s'" lang xmllang) 45 + ~code:"lang-xmllang-mismatch" 46 + ?location 47 + ~element 48 + () 49 + 50 + (** Process language attributes. *) 51 + let process_language_attrs state ~element ~namespace ~attrs ~location collector = 52 + let lang_opt = get_attr attrs "lang" in 53 + let xmllang_opt = get_attr attrs "xml:lang" in 54 + 55 + (* Check if this is the html element *) 56 + if element = "html" && namespace = None then begin 57 + state.html_element_seen <- true; 58 + state.html_has_lang <- lang_opt <> None 59 + end; 60 + 61 + (* Validate lang attribute *) 62 + begin match lang_opt with 63 + | Some lang -> 64 + validate_lang_attr lang ~location ~element collector 65 + | None -> () 66 + end; 67 + 68 + (* Validate xml:lang attribute *) 69 + begin match xmllang_opt with 70 + | Some xmllang -> 71 + validate_lang_attr xmllang ~location ~element collector 72 + | None -> () 73 + end; 74 + 75 + (* Check that lang and xml:lang match if both present *) 76 + begin match lang_opt, xmllang_opt with 77 + | Some lang, Some xmllang -> 78 + check_lang_xmllang_match ~lang ~xmllang ~location ~element collector 79 + | _ -> () 80 + end 81 + 82 + let start_element state ~name ~namespace ~attrs collector = 83 + let location = None in 84 + process_language_attrs state ~element:name ~namespace ~attrs ~location collector 85 + 86 + let end_element _state ~name:_ ~namespace:_ _collector = 87 + () 88 + 89 + let characters _state _text _collector = 90 + () 91 + 92 + let end_document state collector = 93 + (* Warn if html element lacks lang attribute *) 94 + if state.html_element_seen && not state.html_has_lang then 95 + Message_collector.add_warning collector 96 + ~message:"The <html> element should have a lang attribute to specify \ 97 + the document's primary language" 98 + ~code:"missing-lang-on-html" 99 + ~element:"html" 100 + () 101 + 102 + let checker = (module struct 103 + type nonrec state = state 104 + 105 + let create = create 106 + let reset = reset 107 + let start_element = start_element 108 + let end_element = end_element 109 + let characters = characters 110 + let end_document = end_document 111 + end : Checker.S)
+18
lib/html5_checker/specialized/language_checker.mli
··· 1 + (** Language attribute validation checker. 2 + 3 + Validates: 4 + - lang attribute values are valid BCP 47 tags 5 + - xml:lang matches lang when both present 6 + - Document has a lang attribute on root element 7 + 8 + This checker ensures proper language markup: 9 + - lang attribute values are validated using BCP 47 format 10 + - When both lang and xml:lang are present, they must match 11 + - Warning if <html> element lacks lang attribute 12 + - Empty lang="" is valid (indicates unknown language) 13 + - Primary language subtag should be valid *) 14 + 15 + include Checker.S 16 + 17 + val checker : Checker.t 18 + (** [checker] is a checker instance for validating language attributes. *)
+304
lib/html5_checker/specialized/microdata_checker.ml
··· 1 + (** Microdata validation checker. 2 + 3 + Validates HTML5 microdata attributes. *) 4 + 5 + (** Information about an itemscope. *) 6 + type item_scope = { 7 + element : string; 8 + location : Message.location option; [@warning "-69"] 9 + itemtype : string option; [@warning "-69"] 10 + itemid : string option; [@warning "-69"] 11 + itemref : string list; [@warning "-69"] 12 + } 13 + 14 + (** Information about an itemref reference. *) 15 + type itemref_reference = { 16 + referring_element : string; 17 + referenced_ids : string list; 18 + location : Message.location option; 19 + } 20 + 21 + (** Checker state tracking microdata. *) 22 + type state = { 23 + mutable scope_stack : item_scope list; 24 + mutable itemref_references : itemref_reference list; 25 + mutable all_ids : (string, unit) Hashtbl.t; [@warning "-69"] 26 + mutable html_element_seen : bool; [@warning "-69"] 27 + } 28 + 29 + let create () = 30 + { 31 + scope_stack = []; 32 + itemref_references = []; 33 + all_ids = Hashtbl.create 64; 34 + html_element_seen = false; 35 + } 36 + 37 + let reset state = 38 + state.scope_stack <- []; 39 + state.itemref_references <- []; 40 + Hashtbl.clear state.all_ids; 41 + state.html_element_seen <- false 42 + 43 + (** Split whitespace-separated values. *) 44 + let split_whitespace value = 45 + let rec split acc start i = 46 + if i >= String.length value then 47 + if i > start then 48 + (String.sub value start (i - start)) :: acc 49 + else 50 + acc 51 + else 52 + match value.[i] with 53 + | ' ' | '\t' | '\n' | '\r' -> 54 + let acc' = 55 + if i > start then 56 + (String.sub value start (i - start)) :: acc 57 + else 58 + acc 59 + in 60 + split acc' (i + 1) (i + 1) 61 + | _ -> 62 + split acc start (i + 1) 63 + in 64 + List.rev (split [] 0 0) 65 + 66 + (** Check if a string is a valid URL (contains a colon). *) 67 + let is_url s = 68 + String.contains s ':' 69 + 70 + (** Check if itemprop value is valid. *) 71 + let validate_itemprop_value value = 72 + if String.length value = 0 then 73 + Error "itemprop value must not be empty" 74 + else if not (is_url value) && String.contains value ':' then 75 + Error (Printf.sprintf 76 + "itemprop value '%s' contains a colon but is not a URL" value) 77 + else 78 + Ok () 79 + 80 + (** Check if element is inside an itemscope or referenced by itemref. *) 81 + let is_property_element state = 82 + state.scope_stack <> [] 83 + 84 + (** Get attributes from attribute list. *) 85 + let get_attr attrs name = 86 + try Some (List.assoc name attrs) 87 + with Not_found -> None 88 + 89 + (** Process microdata attributes. *) 90 + let process_microdata_attrs state ~element ~attrs ~location collector = 91 + let has_itemscope = List.mem_assoc "itemscope" attrs in 92 + let itemtype_opt = get_attr attrs "itemtype" in 93 + let itemid_opt = get_attr attrs "itemid" in 94 + let itemref_opt = get_attr attrs "itemref" in 95 + let itemprop_opt = get_attr attrs "itemprop" in 96 + 97 + (* Check itemid requires itemscope and itemtype *) 98 + begin match itemid_opt with 99 + | Some _itemid -> 100 + if not has_itemscope then 101 + Message_collector.add_error collector 102 + ~message:"itemid attribute requires itemscope attribute" 103 + ~code:"microdata-itemid-without-itemscope" 104 + ?location 105 + ~element 106 + ~attribute:"itemid" 107 + (); 108 + if itemtype_opt = None then 109 + Message_collector.add_error collector 110 + ~message:"itemid attribute requires itemtype attribute" 111 + ~code:"microdata-itemid-without-itemtype" 112 + ?location 113 + ~element 114 + ~attribute:"itemid" 115 + () 116 + | None -> () 117 + end; 118 + 119 + (* Check itemref requires itemscope *) 120 + begin match itemref_opt with 121 + | Some itemref_value -> 122 + if not has_itemscope then 123 + Message_collector.add_error collector 124 + ~message:"itemref attribute requires itemscope attribute" 125 + ~code:"microdata-itemref-without-itemscope" 126 + ?location 127 + ~element 128 + ~attribute:"itemref" 129 + () 130 + else begin 131 + (* Collect itemref references for later validation *) 132 + let ids = split_whitespace itemref_value in 133 + state.itemref_references <- { 134 + referring_element = element; 135 + referenced_ids = ids; 136 + location; 137 + } :: state.itemref_references 138 + end 139 + | None -> () 140 + end; 141 + 142 + (* Check itemtype requires itemscope *) 143 + begin match itemtype_opt with 144 + | Some _itemtype -> 145 + if not has_itemscope then 146 + Message_collector.add_error collector 147 + ~message:"itemtype attribute requires itemscope attribute" 148 + ~code:"microdata-itemtype-without-itemscope" 149 + ?location 150 + ~element 151 + ~attribute:"itemtype" 152 + () 153 + | None -> () 154 + end; 155 + 156 + (* Check itemprop value validity *) 157 + begin match itemprop_opt with 158 + | Some itemprop_value -> 159 + let props = split_whitespace itemprop_value in 160 + List.iter (fun prop -> 161 + match validate_itemprop_value prop with 162 + | Ok () -> () 163 + | Error msg -> 164 + Message_collector.add_error collector 165 + ~message:msg 166 + ~code:"microdata-invalid-itemprop" 167 + ?location 168 + ~element 169 + ~attribute:"itemprop" 170 + () 171 + ) props; 172 + 173 + (* Check itemprop can only appear on property elements *) 174 + if not (is_property_element state) then 175 + Message_collector.add_error collector 176 + ~message:"itemprop attribute can only appear on elements that are \ 177 + properties of an item (descendant of itemscope or referenced by itemref)" 178 + ~code:"microdata-itemprop-outside-scope" 179 + ?location 180 + ~element 181 + ~attribute:"itemprop" 182 + () 183 + | None -> () 184 + end; 185 + 186 + (* If this element has itemscope, push it onto the stack *) 187 + if has_itemscope then begin 188 + let itemref = match itemref_opt with 189 + | Some v -> split_whitespace v 190 + | None -> [] 191 + in 192 + let scope = { 193 + element; 194 + location; 195 + itemtype = itemtype_opt; 196 + itemid = itemid_opt; 197 + itemref; 198 + } in 199 + state.scope_stack <- scope :: state.scope_stack 200 + end 201 + 202 + (** Track IDs for itemref validation. *) 203 + let track_id state attrs = 204 + match get_attr attrs "id" with 205 + | Some id -> 206 + if String.length id > 0 then 207 + Hashtbl.replace state.all_ids id () 208 + | None -> () 209 + 210 + (** Detect itemref cycles using depth-first search. *) 211 + let detect_itemref_cycles state collector = 212 + (* Build adjacency list from itemref references *) 213 + let graph = Hashtbl.create 32 in 214 + List.iter (fun ref -> 215 + Hashtbl.replace graph ref.referring_element ref.referenced_ids 216 + ) state.itemref_references; 217 + 218 + (* DFS to detect cycles *) 219 + let rec visit visited stack node = 220 + if List.mem node stack then 221 + (* Found a cycle *) 222 + Some (node :: stack) 223 + else if List.mem node visited then 224 + None 225 + else 226 + match Hashtbl.find_opt graph node with 227 + | None -> None 228 + | Some neighbors -> 229 + let stack' = node :: stack in 230 + let rec check_neighbors = function 231 + | [] -> None 232 + | neighbor :: rest -> 233 + match visit visited stack' neighbor with 234 + | Some cycle -> Some cycle 235 + | None -> check_neighbors rest 236 + in 237 + check_neighbors neighbors 238 + in 239 + 240 + (* Check all nodes *) 241 + let rec check_all_nodes visited nodes = 242 + match nodes with 243 + | [] -> () 244 + | node :: rest -> 245 + begin match visit visited [] node with 246 + | Some cycle -> 247 + let cycle_str = String.concat " -> " (List.rev cycle) in 248 + Message_collector.add_error collector 249 + ~message:(Printf.sprintf "itemref cycle detected: %s" cycle_str) 250 + ~code:"microdata-itemref-cycle" 251 + () 252 + | None -> () 253 + end; 254 + check_all_nodes (node :: visited) rest 255 + in 256 + 257 + let all_nodes = Hashtbl.to_seq_keys graph |> List.of_seq in 258 + check_all_nodes [] all_nodes 259 + 260 + let start_element state ~name ~namespace:_ ~attrs collector = 261 + let location = None in 262 + track_id state attrs; 263 + process_microdata_attrs state ~element:name ~attrs ~location collector 264 + 265 + let end_element state ~name ~namespace:_ _collector = 266 + (* Pop itemscope from stack if this element had one *) 267 + match state.scope_stack with 268 + | scope :: rest when scope.element = name -> 269 + state.scope_stack <- rest 270 + | _ -> () 271 + 272 + let characters _state _text _collector = 273 + () 274 + 275 + let end_document state collector = 276 + (* Check all itemref references point to existing IDs *) 277 + List.iter (fun ref -> 278 + List.iter (fun id -> 279 + if not (Hashtbl.mem state.all_ids id) then 280 + Message_collector.add_error collector 281 + ~message:(Printf.sprintf 282 + "itemref on <%s> refers to ID '%s' which does not exist" 283 + ref.referring_element id) 284 + ~code:"microdata-itemref-dangling" 285 + ?location:ref.location 286 + ~element:ref.referring_element 287 + ~attribute:"itemref" 288 + () 289 + ) ref.referenced_ids 290 + ) state.itemref_references; 291 + 292 + (* Detect itemref cycles *) 293 + detect_itemref_cycles state collector 294 + 295 + let checker = (module struct 296 + type nonrec state = state 297 + 298 + let create = create 299 + let reset = reset 300 + let start_element = start_element 301 + let end_element = end_element 302 + let characters = characters 303 + let end_document = end_document 304 + end : Checker.S)
+18
lib/html5_checker/specialized/microdata_checker.mli
··· 1 + (** Microdata validation checker. 2 + 3 + Validates HTML5 microdata (itemscope, itemtype, itemprop, itemid, itemref). 4 + 5 + This checker verifies that microdata attributes are used correctly: 6 + - itemprop can only appear on elements that are properties of an item 7 + (descendant of itemscope or referenced by itemref) 8 + - itemid requires both itemscope and itemtype 9 + - itemref requires itemscope 10 + - itemtype requires itemscope 11 + - itemref values must reference existing IDs 12 + - Detects itemref cycles (A references B, B references A) 13 + - itemprop values must be valid property names (no colons unless URL) *) 14 + 15 + include Checker.S 16 + 17 + val checker : Checker.t 18 + (** [checker] is a checker instance for validating microdata. *)
+833
lib/html5_checker/specialized/table_checker.ml
··· 1 + (** Table structure validation checker implementation. 2 + 3 + This module implements comprehensive table structure validation including 4 + cell overlap detection, span validation, and structural integrity checks. *) 5 + 6 + (** HTML namespace constant *) 7 + let html_ns = "http://www.w3.org/1999/xhtml" 8 + 9 + (** Maximum allowed colspan value per HTML5 spec *) 10 + let max_colspan = 1000 11 + 12 + (** Maximum allowed rowspan value per HTML5 spec *) 13 + let max_rowspan = 65534 14 + 15 + (** Special rowspan value meaning "span to end of row group" *) 16 + let rowspan_zero_magic = max_rowspan 17 + 18 + (** {1 Cell Representation} *) 19 + 20 + (** A table cell with positioning information *) 21 + type cell = { 22 + mutable left : int; 23 + (** Column in which this cell starts (zero-indexed) *) 24 + mutable right : int; 25 + (** First column into which this cell does not span *) 26 + mutable bottom : int; 27 + (** First row onto which this cell does not span (or rowspan_zero_magic) *) 28 + headers : string list; 29 + (** IDs referenced by the headers attribute *) 30 + is_header : bool; 31 + (** Whether this is a th element *) 32 + element_name : string; 33 + (** "td" or "th" *) 34 + } 35 + 36 + (** Create a cell from colspan and rowspan values *) 37 + let make_cell ~colspan ~rowspan ~headers ~is_header collector = 38 + let colspan = 39 + if colspan > max_colspan then ( 40 + Message_collector.add_error collector 41 + ~message: 42 + (Printf.sprintf 43 + {|The value of the "colspan" attribute must be less than or equal to %d.|} 44 + max_colspan) 45 + (); 46 + max_colspan) 47 + else colspan 48 + in 49 + let rowspan = 50 + if rowspan > max_rowspan then ( 51 + Message_collector.add_error collector 52 + ~message: 53 + (Printf.sprintf 54 + {|The value of the "rowspan" attribute must be less than or equal to %d.|} 55 + max_rowspan) 56 + (); 57 + max_rowspan) 58 + else rowspan 59 + in 60 + { 61 + left = 0; 62 + right = colspan; 63 + bottom = (if rowspan = 0 then rowspan_zero_magic else rowspan); 64 + headers; 65 + is_header; 66 + element_name = (if is_header then "th" else "td"); 67 + } 68 + 69 + (** Set the absolute position of a cell *) 70 + let set_cell_position cell ~row ~col = 71 + cell.left <- col; 72 + cell.right <- cell.right + col; 73 + if cell.bottom <> rowspan_zero_magic then cell.bottom <- cell.bottom + row 74 + 75 + (** Check if a cell should be removed from the active set *) 76 + let should_cull_cell cell ~row = row >= cell.bottom 77 + 78 + (** Check if two cells overlap horizontally *) 79 + let cells_overlap_horizontally cell1 cell2 = 80 + not (cell2.right <= cell1.left || cell1.right <= cell2.left) 81 + 82 + (** Emit error for horizontal cell overlap *) 83 + let err_on_horizontal_overlap cell1 cell2 collector = 84 + if cells_overlap_horizontally cell1 cell2 then ( 85 + Message_collector.add_error collector 86 + ~message:"Table cell is overlapped by later table cell." (); 87 + Message_collector.add_error collector 88 + ~message:"Table cell overlaps an earlier table cell." ()) 89 + 90 + (** Check if cell spans past end of row group *) 91 + let err_if_not_rowspan_zero cell ~row_group_type collector = 92 + if cell.bottom <> rowspan_zero_magic then 93 + let group_desc = 94 + match row_group_type with 95 + | None -> "implicit row group" 96 + | Some t -> Printf.sprintf {|row group established by a "%s" element|} t 97 + in 98 + Message_collector.add_error collector 99 + ~message: 100 + (Printf.sprintf 101 + "Table cell spans past the end of its %s; clipped to the end of \ 102 + the row group." 103 + group_desc) 104 + () 105 + 106 + (** {1 Column Range Tracking} *) 107 + 108 + (** A contiguous range of columns without cells *) 109 + type column_range = { 110 + element : string; 111 + (** Element that established this range (col/colgroup/td/th) *) 112 + mutable left : int; 113 + (** Leftmost column in range *) 114 + mutable right : int; 115 + (** First column to right not in range *) 116 + mutable next : column_range option; 117 + (** Next range in linked list *) 118 + } 119 + 120 + (** Create a column range *) 121 + let make_column_range ~element ~left ~right = 122 + { element; left; right; next = None } 123 + 124 + (** Check if column range contains a single column *) 125 + let is_single_col range = range.left + 1 = range.right 126 + 127 + (** Test if a column hits a range (-1=left, 0=in, 1=right) *) 128 + let hits_column range column = 129 + if column < range.left then -1 130 + else if column >= range.right then 1 131 + else 0 132 + 133 + (** Remove a column from a range, returning the new range(s) *) 134 + let remove_column range column = 135 + if is_single_col range then None 136 + else if column = range.left then ( 137 + range.left <- range.left + 1; 138 + Some range) 139 + else if column + 1 = range.right then ( 140 + range.right <- range.right - 1; 141 + Some range) 142 + else 143 + (* Split into two ranges *) 144 + let created = make_column_range ~element:range.element ~left:(column + 1) ~right:range.right in 145 + created.next <- range.next; 146 + range.next <- Some created; 147 + range.right <- column; 148 + Some created 149 + 150 + (** {1 Row Group State} *) 151 + 152 + (** State for a row group (explicit or implicit) *) 153 + type row_group = { 154 + mutable current_row : int; 155 + (** Current row index within this group *) 156 + mutable insertion_point : int; 157 + (** Column position for next cell insertion *) 158 + mutable next_old_cell : int; 159 + (** Index into cells_on_current_row *) 160 + mutable row_had_cells : bool; 161 + (** Whether current row has any cells *) 162 + cells_in_effect : ((int * int), cell) Hashtbl.t; 163 + (** Cells from previous rows still spanning down, keyed by (bottom, left) *) 164 + mutable cells_on_current_row : cell array; 165 + (** Cells from previous rows affecting current row, sorted by left *) 166 + row_group_type : string option; 167 + (** Name of row group element (thead/tbody/tfoot) or None for implicit *) 168 + } 169 + 170 + (** Create a new row group *) 171 + let make_row_group ~row_group_type = 172 + { 173 + current_row = -1; 174 + insertion_point = 0; 175 + next_old_cell = 0; 176 + row_had_cells = false; 177 + cells_in_effect = Hashtbl.create 16; 178 + cells_on_current_row = [||]; 179 + row_group_type; 180 + } 181 + 182 + (** Start a new row in the row group *) 183 + let start_row_in_group group = 184 + group.current_row <- group.current_row + 1; 185 + group.insertion_point <- 0; 186 + group.next_old_cell <- 0; 187 + group.row_had_cells <- false; 188 + (* Collect cells still in effect and sort by left column *) 189 + let active_cells : cell list = 190 + Hashtbl.fold 191 + (fun _ (cell : cell) acc -> if not (should_cull_cell cell ~row:group.current_row) then cell :: acc else acc) 192 + group.cells_in_effect [] 193 + in 194 + let sorted = List.sort (fun (c1 : cell) (c2 : cell) -> Int.compare c1.left c2.left) active_cells in 195 + group.cells_on_current_row <- Array.of_list sorted 196 + 197 + (** Find the next available insertion point *) 198 + let rec find_insertion_point group = 199 + if group.next_old_cell < Array.length group.cells_on_current_row then 200 + let other = group.cells_on_current_row.(group.next_old_cell) in 201 + if group.insertion_point < other.left then () 202 + else ( 203 + let right = other.right in 204 + if right > group.insertion_point then group.insertion_point <- right; 205 + group.next_old_cell <- group.next_old_cell + 1; 206 + find_insertion_point group) 207 + 208 + (** Add a cell to the row group *) 209 + let add_cell_to_group group cell collector = 210 + group.row_had_cells <- true; 211 + find_insertion_point group; 212 + set_cell_position cell ~row:group.current_row ~col:group.insertion_point; 213 + 214 + (* Check for overlaps with cells from previous rows *) 215 + for i = group.next_old_cell to Array.length group.cells_on_current_row - 1 do 216 + err_on_horizontal_overlap group.cells_on_current_row.(i) cell collector 217 + done; 218 + 219 + (* Add to cells in effect if it spans beyond current row *) 220 + if cell.bottom > group.current_row + 1 then 221 + Hashtbl.add group.cells_in_effect (cell.bottom, cell.left) cell; 222 + 223 + group.insertion_point <- cell.right 224 + 225 + (** End the current row *) 226 + let end_row_in_group group collector = 227 + (if not group.row_had_cells then 228 + let group_desc = 229 + match group.row_group_type with 230 + | None -> "an implicit row group" 231 + | Some t -> Printf.sprintf {|a row group established by a "%s" element|} t 232 + in 233 + Message_collector.add_error collector 234 + ~message: 235 + (Printf.sprintf {|Row %d of %s has no cells beginning on it.|} 236 + (group.current_row + 1) group_desc) 237 + ()); 238 + 239 + find_insertion_point group; 240 + group.cells_on_current_row <- [||]; 241 + 242 + (* Cull cells that don't span to next row *) 243 + let to_remove = ref [] in 244 + Hashtbl.iter 245 + (fun key cell -> 246 + if should_cull_cell cell ~row:(group.current_row + 1) then to_remove := key :: !to_remove) 247 + group.cells_in_effect; 248 + List.iter (Hashtbl.remove group.cells_in_effect) !to_remove; 249 + 250 + (* Return the final insertion point (row width) *) 251 + group.insertion_point 252 + 253 + (** End the row group *) 254 + let end_row_group group collector = 255 + Hashtbl.iter 256 + (fun _ cell -> err_if_not_rowspan_zero cell ~row_group_type:group.row_group_type collector) 257 + group.cells_in_effect 258 + 259 + (** {1 Table State} *) 260 + 261 + (** Parser state within a table *) 262 + type table_state = 263 + | InTableAtStart 264 + | InTableAtPotentialRowGroupStart 265 + | InColgroup 266 + | InColInColgroup 267 + | InColInImplicitGroup 268 + | InRowGroup 269 + | InRowInRowGroup 270 + | InCellInRowGroup 271 + | InRowInImplicitRowGroup 272 + | InImplicitRowGroup 273 + | InCellInImplicitRowGroup 274 + | InTableColsSeen 275 + 276 + (** State for a single table *) 277 + type table = { 278 + mutable state : table_state; 279 + mutable suppressed_starts : int; 280 + (** Count of nested suppressed elements *) 281 + mutable hard_width : bool; 282 + (** Whether column count was set by col/colgroup *) 283 + mutable column_count : int; 284 + (** Established column count (-1 if not set) *) 285 + mutable real_column_count : int; 286 + (** Actual maximum column count seen *) 287 + mutable pending_colgroup_span : int; 288 + (** Span for colgroup without col children *) 289 + header_ids : (string, unit) Hashtbl.t; 290 + (** IDs of th elements *) 291 + cells_with_headers : cell list ref; 292 + (** Cells with headers attribute *) 293 + mutable current_row_group : row_group option; 294 + (** Current row group *) 295 + mutable first_col_range : column_range option; 296 + (** Head of column range list *) 297 + mutable last_col_range : column_range option; 298 + (** Tail of column range list *) 299 + mutable current_col_range : column_range option; 300 + (** Current range being inspected *) 301 + mutable previous_col_range : column_range option; 302 + (** Previous range inspected *) 303 + } 304 + 305 + (** Create a new table *) 306 + let make_table () = 307 + { 308 + state = InTableAtStart; 309 + suppressed_starts = 0; 310 + hard_width = false; 311 + column_count = -1; 312 + real_column_count = 0; 313 + pending_colgroup_span = 0; 314 + header_ids = Hashtbl.create 16; 315 + cells_with_headers = ref []; 316 + current_row_group = None; 317 + first_col_range = None; 318 + last_col_range = None; 319 + current_col_range = None; 320 + previous_col_range = None; 321 + } 322 + 323 + (** Append a column range to the list *) 324 + let append_column_range table range = 325 + match table.last_col_range with 326 + | None -> 327 + table.first_col_range <- Some range; 328 + table.last_col_range <- Some range 329 + | Some last -> 330 + last.next <- Some range; 331 + table.last_col_range <- Some range 332 + 333 + (** Report a cell back to table for column tracking *) 334 + let report_cell_to_table table (cell : cell) = 335 + let left = cell.left in 336 + let right = cell.right in 337 + 338 + (* Check if cell extends past known columns *) 339 + if right > table.real_column_count then ( 340 + if left = table.real_column_count then ( 341 + (* Entirely past existing columns *) 342 + if left + 1 <> right then 343 + append_column_range table 344 + (make_column_range ~element:cell.element_name ~left:(left + 1) ~right); 345 + table.real_column_count <- right) 346 + else ( 347 + (* Partially past existing columns *) 348 + append_column_range table 349 + (make_column_range ~element:cell.element_name ~left:table.real_column_count ~right); 350 + table.real_column_count <- right)); 351 + 352 + (* Track column usage *) 353 + let rec process_ranges () = 354 + match table.current_col_range with 355 + | None -> () 356 + | Some range -> 357 + let hit = hits_column range left in 358 + if hit = 0 then ( 359 + (* Column hits this range - remove it *) 360 + match remove_column range left with 361 + | None -> 362 + (* Range destroyed *) 363 + if Option.is_some table.previous_col_range then 364 + (Option.get table.previous_col_range).next <- range.next; 365 + if table.first_col_range = Some range then table.first_col_range <- range.next; 366 + if table.last_col_range = Some range then table.last_col_range <- table.previous_col_range; 367 + table.current_col_range <- range.next 368 + | Some new_range -> 369 + if table.last_col_range = Some range then table.last_col_range <- Some new_range; 370 + table.current_col_range <- Some new_range) 371 + else if hit = -1 then 372 + () 373 + else ( 374 + (* hit = 1, try next range *) 375 + table.previous_col_range <- Some range; 376 + table.current_col_range <- range.next; 377 + process_ranges ()) 378 + in 379 + process_ranges () 380 + 381 + (** {1 Attribute Parsing} *) 382 + 383 + (** Parse a non-negative integer attribute, returning 1 if absent or invalid *) 384 + let parse_non_negative_int attrs name = 385 + match List.assoc_opt name attrs with 386 + | None -> 1 387 + | Some v -> ( 388 + try 389 + let n = int_of_string v in 390 + if n >= 0 then n else 1 391 + with Failure _ -> 1) 392 + 393 + (** Parse a positive integer attribute, returning 1 if absent or invalid *) 394 + let parse_positive_int attrs name = 395 + match List.assoc_opt name attrs with 396 + | None -> 1 397 + | Some v -> ( 398 + try 399 + let n = int_of_string v in 400 + if n > 0 then n else 1 401 + with Failure _ -> 1) 402 + 403 + (** Parse the headers attribute into a list of IDs *) 404 + let parse_headers attrs = 405 + match List.assoc_opt "headers" attrs with 406 + | None -> [] 407 + | Some v -> 408 + let parts = String.split_on_char ' ' v in 409 + List.filter (fun s -> String.length s > 0) parts 410 + 411 + (** Parse span attribute, clamping to max_colspan *) 412 + let parse_span attrs collector = 413 + let span = parse_non_negative_int attrs "span" in 414 + if span > max_colspan then ( 415 + Message_collector.add_error collector 416 + ~message: 417 + (Printf.sprintf {|The value of the "span" attribute must be less than or equal to %d.|} 418 + max_colspan) 419 + (); 420 + max_colspan) 421 + else span 422 + 423 + (** {1 Table Event Handlers} *) 424 + 425 + (** Check if we should suppress the start event *) 426 + let need_suppress_start table = 427 + if table.suppressed_starts > 0 then ( 428 + table.suppressed_starts <- table.suppressed_starts + 1; 429 + true) 430 + else false 431 + 432 + (** Check if we should suppress the end event *) 433 + let need_suppress_end table = 434 + if table.suppressed_starts > 0 then ( 435 + table.suppressed_starts <- table.suppressed_starts - 1; 436 + true) 437 + else false 438 + 439 + (** Start a row group *) 440 + let start_row_group table local_name collector = 441 + if need_suppress_start table then () 442 + else 443 + match table.state with 444 + | InImplicitRowGroup -> ( 445 + match table.current_row_group with 446 + | Some group -> 447 + end_row_group group collector; 448 + table.current_row_group <- Some (make_row_group ~row_group_type:(Some local_name)); 449 + table.state <- InRowGroup 450 + | None -> failwith "Bug: InImplicitRowGroup but no current row group") 451 + | InTableAtStart | InTableColsSeen | InTableAtPotentialRowGroupStart -> 452 + table.current_row_group <- Some (make_row_group ~row_group_type:(Some local_name)); 453 + table.state <- InRowGroup 454 + | _ -> table.suppressed_starts <- 1 455 + 456 + (** End a row group *) 457 + let end_row_group_handler table collector = 458 + if need_suppress_end table then () 459 + else 460 + match table.state with 461 + | InRowGroup -> ( 462 + match table.current_row_group with 463 + | Some group -> 464 + end_row_group group collector; 465 + table.current_row_group <- None; 466 + table.state <- InTableAtPotentialRowGroupStart 467 + | None -> failwith "Bug: InRowGroup but no current row group") 468 + | _ -> failwith "Bug: end_row_group in wrong state" 469 + 470 + (** Start a row *) 471 + let start_row table collector = 472 + if need_suppress_start table then () 473 + else 474 + match table.state with 475 + | InTableAtStart | InTableColsSeen | InTableAtPotentialRowGroupStart -> 476 + table.current_row_group <- Some (make_row_group ~row_group_type:None); 477 + table.state <- InRowInImplicitRowGroup; 478 + table.current_col_range <- table.first_col_range; 479 + table.previous_col_range <- None; 480 + (match table.current_row_group with 481 + | Some group -> start_row_in_group group 482 + | None -> failwith "Bug: just created row group") 483 + | InImplicitRowGroup -> 484 + table.state <- InRowInImplicitRowGroup; 485 + table.current_col_range <- table.first_col_range; 486 + table.previous_col_range <- None; 487 + (match table.current_row_group with 488 + | Some group -> start_row_in_group group 489 + | None -> failwith "Bug: InImplicitRowGroup but no row group") 490 + | InRowGroup -> 491 + table.state <- InRowInRowGroup; 492 + table.current_col_range <- table.first_col_range; 493 + table.previous_col_range <- None; 494 + (match table.current_row_group with 495 + | Some group -> start_row_in_group group 496 + | None -> failwith "Bug: InRowGroup but no row group") 497 + | _ -> table.suppressed_starts <- 1 498 + 499 + (** End a row *) 500 + let end_row table collector = 501 + if need_suppress_end table then () 502 + else 503 + match table.state with 504 + | InRowInRowGroup -> 505 + table.state <- InRowGroup; 506 + (match table.current_row_group with 507 + | Some group -> 508 + let row_width = end_row_in_group group collector in 509 + (* Check row width against column count *) 510 + if table.hard_width then ( 511 + if row_width > table.column_count then 512 + Message_collector.add_error collector 513 + ~message: 514 + (Printf.sprintf 515 + {|A table row was %d columns wide and exceeded the column count established using column markup (%d).|} 516 + row_width table.column_count) 517 + () 518 + else if row_width < table.column_count then 519 + Message_collector.add_error collector 520 + ~message: 521 + (Printf.sprintf 522 + {|A table row was %d columns wide, which is less than the column count established using column markup (%d).|} 523 + row_width table.column_count) 524 + ()) 525 + else if table.column_count = -1 then 526 + table.column_count <- row_width 527 + else ( 528 + if row_width > table.column_count then 529 + Message_collector.add_warning collector 530 + ~message: 531 + (Printf.sprintf 532 + {|A table row was %d columns wide and exceeded the column count established by the first row (%d).|} 533 + row_width table.column_count) 534 + () 535 + else if row_width < table.column_count then 536 + Message_collector.add_warning collector 537 + ~message: 538 + (Printf.sprintf 539 + {|A table row was %d columns wide, which is less than the column count established by the first row (%d).|} 540 + row_width table.column_count) 541 + ()) 542 + | None -> failwith "Bug: InRowInRowGroup but no row group") 543 + | InRowInImplicitRowGroup -> 544 + table.state <- InImplicitRowGroup; 545 + (match table.current_row_group with 546 + | Some group -> 547 + let row_width = end_row_in_group group collector in 548 + (* Same column count checking as above *) 549 + if table.hard_width then ( 550 + if row_width > table.column_count then 551 + Message_collector.add_error collector 552 + ~message: 553 + (Printf.sprintf 554 + {|A table row was %d columns wide and exceeded the column count established using column markup (%d).|} 555 + row_width table.column_count) 556 + () 557 + else if row_width < table.column_count then 558 + Message_collector.add_error collector 559 + ~message: 560 + (Printf.sprintf 561 + {|A table row was %d columns wide, which is less than the column count established using column markup (%d).|} 562 + row_width table.column_count) 563 + ()) 564 + else if table.column_count = -1 then 565 + table.column_count <- row_width 566 + else ( 567 + if row_width > table.column_count then 568 + Message_collector.add_warning collector 569 + ~message: 570 + (Printf.sprintf 571 + {|A table row was %d columns wide and exceeded the column count established by the first row (%d).|} 572 + row_width table.column_count) 573 + () 574 + else if row_width < table.column_count then 575 + Message_collector.add_warning collector 576 + ~message: 577 + (Printf.sprintf 578 + {|A table row was %d columns wide, which is less than the column count established by the first row (%d).|} 579 + row_width table.column_count) 580 + ()) 581 + | None -> failwith "Bug: InRowInImplicitRowGroup but no row group") 582 + | _ -> failwith "Bug: end_row in wrong state" 583 + 584 + (** Start a cell *) 585 + let start_cell table is_header attrs collector = 586 + if need_suppress_start table then () 587 + else 588 + match table.state with 589 + | InRowInRowGroup -> 590 + table.state <- InCellInRowGroup; 591 + (* Record header ID if present *) 592 + if is_header then ( 593 + match List.assoc_opt "id" attrs with 594 + | Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id () 595 + | _ -> ()); 596 + (* Parse cell attributes *) 597 + let colspan = abs (parse_positive_int attrs "colspan") in 598 + let rowspan = abs (parse_non_negative_int attrs "rowspan") in 599 + let headers = parse_headers attrs in 600 + let cell = make_cell ~colspan ~rowspan ~headers ~is_header collector in 601 + if List.length headers > 0 then table.cells_with_headers := cell :: !(table.cells_with_headers); 602 + (match table.current_row_group with 603 + | Some group -> 604 + add_cell_to_group group cell collector; 605 + report_cell_to_table table cell 606 + | None -> failwith "Bug: InRowInRowGroup but no row group") 607 + | InRowInImplicitRowGroup -> 608 + table.state <- InCellInImplicitRowGroup; 609 + (* Same logic as above *) 610 + if is_header then ( 611 + match List.assoc_opt "id" attrs with 612 + | Some id when String.length id > 0 -> Hashtbl.replace table.header_ids id () 613 + | _ -> ()); 614 + let colspan = abs (parse_positive_int attrs "colspan") in 615 + let rowspan = abs (parse_non_negative_int attrs "rowspan") in 616 + let headers = parse_headers attrs in 617 + let cell = make_cell ~colspan ~rowspan ~headers ~is_header collector in 618 + if List.length headers > 0 then table.cells_with_headers := cell :: !(table.cells_with_headers); 619 + (match table.current_row_group with 620 + | Some group -> 621 + add_cell_to_group group cell collector; 622 + report_cell_to_table table cell 623 + | None -> failwith "Bug: InRowInImplicitRowGroup but no row group") 624 + | _ -> table.suppressed_starts <- 1 625 + 626 + (** End a cell *) 627 + let end_cell table = 628 + if need_suppress_end table then () 629 + else 630 + match table.state with 631 + | InCellInRowGroup -> table.state <- InRowInRowGroup 632 + | InCellInImplicitRowGroup -> table.state <- InRowInImplicitRowGroup 633 + | _ -> failwith "Bug: end_cell in wrong state" 634 + 635 + (** Start a colgroup *) 636 + let start_colgroup table attrs collector = 637 + if need_suppress_start table then () 638 + else 639 + match table.state with 640 + | InTableAtStart -> 641 + table.hard_width <- true; 642 + table.column_count <- 0; 643 + table.pending_colgroup_span <- parse_span attrs collector; 644 + table.state <- InColgroup 645 + | InTableColsSeen -> 646 + table.pending_colgroup_span <- parse_span attrs collector; 647 + table.state <- InColgroup 648 + | _ -> table.suppressed_starts <- 1 649 + 650 + (** End a colgroup *) 651 + let end_colgroup table = 652 + if need_suppress_end table then () 653 + else 654 + match table.state with 655 + | InColgroup -> 656 + if table.pending_colgroup_span <> 0 then ( 657 + let right = table.column_count + abs table.pending_colgroup_span in 658 + let range = make_column_range ~element:"colgroup" ~left:table.column_count ~right in 659 + append_column_range table range; 660 + table.column_count <- right); 661 + table.real_column_count <- table.column_count; 662 + table.state <- InTableColsSeen 663 + | _ -> failwith "Bug: end_colgroup in wrong state" 664 + 665 + (** Start a col *) 666 + let start_col table attrs collector = 667 + if need_suppress_start table then () 668 + else 669 + match table.state with 670 + | InTableAtStart -> 671 + table.hard_width <- true; 672 + table.column_count <- 0; 673 + table.state <- InColInImplicitGroup; 674 + let span = abs (parse_span attrs collector) in 675 + let right = table.column_count + span in 676 + let range = make_column_range ~element:"col" ~left:table.column_count ~right in 677 + append_column_range table range; 678 + table.column_count <- right; 679 + table.real_column_count <- table.column_count 680 + | InTableColsSeen -> 681 + table.state <- InColInImplicitGroup; 682 + let span = abs (parse_span attrs collector) in 683 + let right = table.column_count + span in 684 + let range = make_column_range ~element:"col" ~left:table.column_count ~right in 685 + append_column_range table range; 686 + table.column_count <- right; 687 + table.real_column_count <- table.column_count 688 + | InColgroup -> 689 + if table.pending_colgroup_span > 0 then 690 + Message_collector.add_warning collector 691 + ~message: 692 + (Printf.sprintf 693 + "A col element causes a span attribute with value %d to be ignored on the \ 694 + parent colgroup." 695 + table.pending_colgroup_span) 696 + (); 697 + table.pending_colgroup_span <- 0; 698 + table.state <- InColInColgroup; 699 + let span = abs (parse_span attrs collector) in 700 + let right = table.column_count + span in 701 + let range = make_column_range ~element:"col" ~left:table.column_count ~right in 702 + append_column_range table range; 703 + table.column_count <- right; 704 + table.real_column_count <- table.column_count 705 + | _ -> table.suppressed_starts <- 1 706 + 707 + (** End a col *) 708 + let end_col table = 709 + if need_suppress_end table then () 710 + else 711 + match table.state with 712 + | InColInImplicitGroup -> table.state <- InTableColsSeen 713 + | InColInColgroup -> table.state <- InColgroup 714 + | _ -> failwith "Bug: end_col in wrong state" 715 + 716 + (** End a table *) 717 + let end_table table collector = 718 + (match table.state with 719 + | InImplicitRowGroup -> ( 720 + match table.current_row_group with 721 + | Some group -> 722 + end_row_group group collector; 723 + table.current_row_group <- None 724 + | None -> failwith "Bug: InImplicitRowGroup but no row group") 725 + | InTableAtStart | InTableAtPotentialRowGroupStart | InTableColsSeen -> () 726 + | _ -> failwith "Bug: end_table in wrong state"); 727 + 728 + (* Check header reference integrity *) 729 + List.iter 730 + (fun cell -> 731 + List.iter 732 + (fun heading -> 733 + if not (Hashtbl.mem table.header_ids heading) then 734 + Message_collector.add_error collector 735 + ~message: 736 + (Printf.sprintf 737 + {|The "headers" attribute on the element "%s" refers to the ID "%s", but there is no "th" element with that ID in the same table.|} 738 + cell.element_name heading) 739 + ()) 740 + cell.headers) 741 + !(table.cells_with_headers); 742 + 743 + (* Check that each column established by col/colgroup has cells *) 744 + let rec check_ranges range = 745 + match range with 746 + | None -> () 747 + | Some r -> 748 + if is_single_col r then 749 + Message_collector.add_error collector 750 + ~message: 751 + (Printf.sprintf {|Table column %d established by element "%s" has no cells beginning in it.|} 752 + r.right r.element) 753 + () 754 + else 755 + Message_collector.add_error collector 756 + ~message: 757 + (Printf.sprintf 758 + {|Table columns in range %d…%d established by element "%s" have no cells beginning in them.|} 759 + (r.left + 1) r.right r.element) 760 + (); 761 + check_ranges r.next 762 + in 763 + check_ranges table.first_col_range 764 + 765 + (** {1 Checker State} *) 766 + 767 + type state = { tables : table list ref (* Stack of nested tables *) } 768 + 769 + let create () = { tables = ref [] } 770 + 771 + let reset state = state.tables := [] 772 + 773 + let start_element state ~name ~namespace ~attrs collector = 774 + match namespace with 775 + | Some ns when ns = html_ns -> ( 776 + match name with 777 + | "table" -> 778 + (* Push a new table onto the stack *) 779 + state.tables := make_table () :: !(state.tables) 780 + | _ -> ( 781 + match !(state.tables) with 782 + | [] -> () 783 + | table :: _ -> ( 784 + match name with 785 + | "td" -> start_cell table false attrs collector 786 + | "th" -> start_cell table true attrs collector 787 + | "tr" -> start_row table collector 788 + | "tbody" | "thead" | "tfoot" -> start_row_group table name collector 789 + | "col" -> start_col table attrs collector 790 + | "colgroup" -> start_colgroup table attrs collector 791 + | _ -> ()))) 792 + | _ -> () 793 + 794 + let end_element state ~name ~namespace collector = 795 + match namespace with 796 + | Some ns when ns = html_ns -> ( 797 + match name with 798 + | "table" -> ( 799 + match !(state.tables) with 800 + | [] -> failwith "Bug: end table but no table on stack" 801 + | table :: rest -> 802 + end_table table collector; 803 + state.tables := rest) 804 + | _ -> ( 805 + match !(state.tables) with 806 + | [] -> () 807 + | table :: _ -> ( 808 + match name with 809 + | "td" | "th" -> end_cell table 810 + | "tr" -> end_row table collector 811 + | "tbody" | "thead" | "tfoot" -> end_row_group_handler table collector 812 + | "col" -> end_col table 813 + | "colgroup" -> end_colgroup table 814 + | _ -> ()))) 815 + | _ -> () 816 + 817 + let characters _state _text _collector = () 818 + 819 + let end_document state collector = 820 + if !(state.tables) <> [] then 821 + Message_collector.add_error collector ~message:"Unclosed table element at end of document." () 822 + 823 + let checker = 824 + (module struct 825 + type nonrec state = state 826 + 827 + let create = create 828 + let reset = reset 829 + let start_element = start_element 830 + let end_element = end_element 831 + let characters = characters 832 + let end_document = end_document 833 + end : Checker.S)
+83
lib/html5_checker/specialized/table_checker.mli
··· 1 + (** Table structure validation checker. 2 + 3 + Validates HTML table integrity including: 4 + - Cell overlap detection (rowspan/colspan causing overlap) 5 + - Spanning past table boundaries 6 + - Proper table structure (thead/tbody/tfoot ordering) 7 + - Maximum colspan limit (1000 per HTML spec) 8 + - Maximum rowspan limit (65534 per HTML spec) 9 + 10 + {2 Validation Rules} 11 + 12 + {b Cell Positioning} 13 + - Detects when two cells claim the same grid position 14 + - Validates that rowspan/colspan don't cause cells to extend past boundaries 15 + - Tracks cell positions accounting for rowspan/colspan from previous rows 16 + 17 + {b Span Limits} 18 + - [colspan] must be positive and <= 1000 19 + - [rowspan] must be non-negative and <= 65534 20 + - [rowspan=0] is a special value meaning "span to end of row group" 21 + 22 + {b Table Structure} 23 + - [caption] must be first child of [table] if present 24 + - [thead] must come before [tbody] and [tfoot] 25 + - Only one [thead] and one [tfoot] allowed per table 26 + - [col] elements can establish explicit column count 27 + - [colgroup] elements can group columns 28 + 29 + {b Row Validation} 30 + - Each row must have at least one cell 31 + - Row widths should match the established column count 32 + - Cells cannot overlap horizontally in the same row 33 + 34 + {b Header References} 35 + - [headers] attribute on [td]/[th] must reference valid [th] IDs 36 + - All referenced header IDs must exist in the same table 37 + 38 + {3 Example Valid Table} 39 + 40 + {v 41 + <table> 42 + <thead> 43 + <tr> 44 + <th id="h1">Header 1</th> 45 + <th id="h2">Header 2</th> 46 + </tr> 47 + </thead> 48 + <tbody> 49 + <tr> 50 + <td headers="h1">Cell 1</td> 51 + <td headers="h2">Cell 2</td> 52 + </tr> 53 + </tbody> 54 + </table> 55 + v} 56 + 57 + {3 Example Invalid Table (Overlapping Cells)} 58 + 59 + {v 60 + <table> 61 + <tr> 62 + <td rowspan="2">A</td> 63 + <td>B</td> 64 + </tr> 65 + <tr> 66 + <td>C</td> <!-- Would overlap with A's rowspan --> 67 + </tr> 68 + </table> 69 + v} 70 + 71 + @see <https://html.spec.whatwg.org/multipage/tables.html> WHATWG HTML: Tables 72 + @see <https://www.w3.org/TR/html52/tabular-data.html> W3C HTML 5.2: Tables *) 73 + 74 + include Checker.S 75 + 76 + val checker : Checker.t 77 + (** A first-class module instance of this checker. 78 + 79 + {b Usage:} 80 + {[ 81 + let checker = Table_checker.checker in 82 + Checker_registry.register "table-structure" checker 83 + ]} *)
+2 -1
lib/html5rw/parser/parser_impl.ml
··· 25 25 module TreeBuilderSink = struct 26 26 type t = Parser_tree_builder.t 27 27 28 - let process tb token = 28 + let process tb token ~line ~column = 29 + Parser_tree_builder.set_position tb ~line ~column; 29 30 Parser_tree_builder.process_token tb token; 30 31 (* Check if we need to switch tokenizer state based on current element *) 31 32 (* Only switch for HTML namespace elements - SVG/MathML use different rules *)
+10 -1
lib/html5rw/parser/parser_tree_builder.ml
··· 42 42 fragment_context : fragment_context option; 43 43 mutable fragment_context_element : Dom.node option; 44 44 iframe_srcdoc : bool; 45 + mutable current_line : int; 46 + mutable current_column : int; 45 47 } 46 48 47 49 let create ?(collect_errors=false) ?fragment_context ?(iframe_srcdoc=false) () = ··· 66 68 fragment_context; 67 69 fragment_context_element = None; 68 70 iframe_srcdoc; 71 + current_line = 1; 72 + current_column = 1; 69 73 } in 70 74 (* Initialize fragment parsing *) 71 75 (match fragment_context with ··· 110 114 | None -> ()); 111 115 t 112 116 117 + (* Position tracking for error reporting *) 118 + let set_position t ~line ~column = 119 + t.current_line <- line; 120 + t.current_column <- column 121 + 113 122 (* Error handling *) 114 123 let parse_error t code = 115 124 if t.collect_errors then 116 - t.errors <- { code = Parse_error_code.of_string code; line = 0; column = 0 } :: t.errors 125 + t.errors <- { code = Parse_error_code.of_string code; line = t.current_line; column = t.current_column } :: t.errors 117 126 118 127 (* Stack helpers *) 119 128 let current_node t =
+1 -1
lib/html5rw/tokenizer/tokenizer.mli
··· 194 194 *) 195 195 module type SINK = sig 196 196 type t 197 - val process : t -> Tokenizer_token.t -> [ `Continue | `SwitchTo of Tokenizer_state.t ] 197 + val process : t -> Tokenizer_token.t -> line:int -> column:int -> [ `Continue | `SwitchTo of Tokenizer_state.t ] 198 198 val adjusted_current_node_in_html_namespace : t -> bool 199 199 end 200 200
+41 -38
lib/html5rw/tokenizer/tokenizer_impl.ml
··· 11 11 (* Token sink interface *) 12 12 module type SINK = sig 13 13 type t 14 - val process : t -> Tokenizer_token.t -> [ `Continue | `SwitchTo of Tokenizer_state.t ] 14 + val process : t -> Tokenizer_token.t -> line:int -> column:int -> [ `Continue | `SwitchTo of Tokenizer_state.t ] 15 15 val adjusted_current_node_in_html_namespace : t -> bool 16 16 end 17 17 ··· 184 184 let data = Buffer.contents t.pending_chars in 185 185 Buffer.clear t.pending_chars; 186 186 let data = if t.xml_mode then transform_xml_chars data else data in 187 - ignore (S.process t.sink (Tokenizer_token.Character data)) 187 + let line, column = Tokenizer_stream.position t.stream in 188 + ignore (S.process t.sink (Tokenizer_token.Character data) ~line ~column) 188 189 end 189 190 in 190 191 191 192 let emit token = 192 193 emit_pending_chars (); 193 - match S.process t.sink token with 194 + let line, column = Tokenizer_stream.position t.stream in 195 + match S.process t.sink token ~line ~column with 194 196 | `Continue -> () 195 197 | `SwitchTo new_state -> t.state <- new_state 196 198 in ··· 278 280 handle_eof () 279 281 end else if Tokenizer_stream.is_eof t.stream then begin 280 282 emit_pending_chars (); 281 - ignore (S.process t.sink Tokenizer_token.EOF) 283 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 282 284 end else begin 283 285 step (); 284 286 process_state () ··· 288 290 match t.state with 289 291 | Tokenizer_state.Data -> 290 292 emit_pending_chars (); 291 - ignore (S.process t.sink Tokenizer_token.EOF) 293 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 292 294 | Tokenizer_state.Tag_open -> 293 295 error t "eof-before-tag-name"; 294 296 emit_char t '<'; 295 297 emit_pending_chars (); 296 - ignore (S.process t.sink Tokenizer_token.EOF) 298 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 297 299 | Tokenizer_state.End_tag_open -> 298 300 error t "eof-before-tag-name"; 299 301 emit_str t "</"; 300 302 emit_pending_chars (); 301 - ignore (S.process t.sink Tokenizer_token.EOF) 303 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 302 304 | Tokenizer_state.Tag_name 303 305 | Tokenizer_state.Before_attribute_name 304 306 | Tokenizer_state.Attribute_name ··· 311 313 | Tokenizer_state.Self_closing_start_tag -> 312 314 error t "eof-in-tag"; 313 315 emit_pending_chars (); 314 - ignore (S.process t.sink Tokenizer_token.EOF) 316 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 315 317 | Tokenizer_state.Rawtext -> 316 318 emit_pending_chars (); 317 - ignore (S.process t.sink Tokenizer_token.EOF) 319 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 318 320 | Tokenizer_state.Rawtext_less_than_sign -> 319 321 emit_char t '<'; 320 322 emit_pending_chars (); 321 - ignore (S.process t.sink Tokenizer_token.EOF) 323 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 322 324 | Tokenizer_state.Rawtext_end_tag_open -> 323 325 emit_str t "</"; 324 326 emit_pending_chars (); 325 - ignore (S.process t.sink Tokenizer_token.EOF) 327 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 326 328 | Tokenizer_state.Rawtext_end_tag_name -> 327 329 emit_str t "</"; 328 330 emit_str t (Buffer.contents t.temp_buffer); 329 331 emit_pending_chars (); 330 - ignore (S.process t.sink Tokenizer_token.EOF) 332 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 331 333 | Tokenizer_state.Rcdata -> 332 334 emit_pending_chars (); 333 - ignore (S.process t.sink Tokenizer_token.EOF) 335 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 334 336 | Tokenizer_state.Rcdata_less_than_sign -> 335 337 emit_char t '<'; 336 338 emit_pending_chars (); 337 - ignore (S.process t.sink Tokenizer_token.EOF) 339 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 338 340 | Tokenizer_state.Rcdata_end_tag_open -> 339 341 emit_str t "</"; 340 342 emit_pending_chars (); 341 - ignore (S.process t.sink Tokenizer_token.EOF) 343 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 342 344 | Tokenizer_state.Rcdata_end_tag_name -> 343 345 emit_str t "</"; 344 346 emit_str t (Buffer.contents t.temp_buffer); 345 347 emit_pending_chars (); 346 - ignore (S.process t.sink Tokenizer_token.EOF) 348 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 347 349 | Tokenizer_state.Script_data -> 348 350 emit_pending_chars (); 349 - ignore (S.process t.sink Tokenizer_token.EOF) 351 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 350 352 | Tokenizer_state.Script_data_less_than_sign -> 351 353 emit_char t '<'; 352 354 emit_pending_chars (); 353 - ignore (S.process t.sink Tokenizer_token.EOF) 355 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 354 356 | Tokenizer_state.Script_data_end_tag_open -> 355 357 emit_str t "</"; 356 358 emit_pending_chars (); 357 - ignore (S.process t.sink Tokenizer_token.EOF) 359 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 358 360 | Tokenizer_state.Script_data_end_tag_name -> 359 361 emit_str t "</"; 360 362 emit_str t (Buffer.contents t.temp_buffer); 361 363 emit_pending_chars (); 362 - ignore (S.process t.sink Tokenizer_token.EOF) 364 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 363 365 | Tokenizer_state.Script_data_escape_start 364 366 | Tokenizer_state.Script_data_escape_start_dash 365 367 | Tokenizer_state.Script_data_escaped ··· 367 369 | Tokenizer_state.Script_data_escaped_dash_dash -> 368 370 error t "eof-in-script-html-comment-like-text"; 369 371 emit_pending_chars (); 370 - ignore (S.process t.sink Tokenizer_token.EOF) 372 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 371 373 | Tokenizer_state.Script_data_escaped_less_than_sign -> 372 374 emit_char t '<'; 373 375 emit_pending_chars (); 374 - ignore (S.process t.sink Tokenizer_token.EOF) 376 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 375 377 | Tokenizer_state.Script_data_escaped_end_tag_open -> 376 378 emit_str t "</"; 377 379 emit_pending_chars (); 378 - ignore (S.process t.sink Tokenizer_token.EOF) 380 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 379 381 | Tokenizer_state.Script_data_escaped_end_tag_name -> 380 382 emit_str t "</"; 381 383 emit_str t (Buffer.contents t.temp_buffer); 382 384 emit_pending_chars (); 383 - ignore (S.process t.sink Tokenizer_token.EOF) 385 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 384 386 | Tokenizer_state.Script_data_double_escape_start 385 387 | Tokenizer_state.Script_data_double_escaped 386 388 | Tokenizer_state.Script_data_double_escaped_dash 387 389 | Tokenizer_state.Script_data_double_escaped_dash_dash -> 388 390 error t "eof-in-script-html-comment-like-text"; 389 391 emit_pending_chars (); 390 - ignore (S.process t.sink Tokenizer_token.EOF) 392 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 391 393 | Tokenizer_state.Script_data_double_escaped_less_than_sign -> 392 394 (* '<' was already emitted when entering this state from Script_data_double_escaped *) 393 395 emit_pending_chars (); 394 - ignore (S.process t.sink Tokenizer_token.EOF) 396 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 395 397 | Tokenizer_state.Script_data_double_escape_end -> 396 398 emit_pending_chars (); 397 - ignore (S.process t.sink Tokenizer_token.EOF) 399 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 398 400 | Tokenizer_state.Plaintext -> 399 401 emit_pending_chars (); 400 - ignore (S.process t.sink Tokenizer_token.EOF) 402 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 401 403 | Tokenizer_state.Comment_start 402 404 | Tokenizer_state.Comment_start_dash 403 405 | Tokenizer_state.Comment ··· 411 413 error t "eof-in-comment"; 412 414 emit_current_comment (); 413 415 emit_pending_chars (); 414 - ignore (S.process t.sink Tokenizer_token.EOF) 416 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 415 417 | Tokenizer_state.Bogus_comment -> 416 418 emit_current_comment (); 417 419 emit_pending_chars (); 418 - ignore (S.process t.sink Tokenizer_token.EOF) 420 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 419 421 | Tokenizer_state.Markup_declaration_open -> 420 422 error t "incorrectly-opened-comment"; 421 423 Buffer.clear t.current_comment; 422 424 emit_current_comment (); 423 425 emit_pending_chars (); 424 - ignore (S.process t.sink Tokenizer_token.EOF) 426 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 425 427 | Tokenizer_state.Doctype 426 428 | Tokenizer_state.Before_doctype_name -> 427 429 error t "eof-in-doctype"; ··· 429 431 t.current_doctype_force_quirks <- true; 430 432 emit_current_doctype (); 431 433 emit_pending_chars (); 432 - ignore (S.process t.sink Tokenizer_token.EOF) 434 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 433 435 | Tokenizer_state.Doctype_name 434 436 | Tokenizer_state.After_doctype_name 435 437 | Tokenizer_state.After_doctype_public_keyword ··· 447 449 t.current_doctype_force_quirks <- true; 448 450 emit_current_doctype (); 449 451 emit_pending_chars (); 450 - ignore (S.process t.sink Tokenizer_token.EOF) 452 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 451 453 | Tokenizer_state.Bogus_doctype -> 452 454 emit_current_doctype (); 453 455 emit_pending_chars (); 454 - ignore (S.process t.sink Tokenizer_token.EOF) 456 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 455 457 | Tokenizer_state.Cdata_section -> 456 458 error t "eof-in-cdata"; 457 459 emit_pending_chars (); 458 - ignore (S.process t.sink Tokenizer_token.EOF) 460 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 459 461 | Tokenizer_state.Cdata_section_bracket -> 460 462 error t "eof-in-cdata"; 461 463 emit_char t ']'; 462 464 emit_pending_chars (); 463 - ignore (S.process t.sink Tokenizer_token.EOF) 465 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 464 466 | Tokenizer_state.Cdata_section_end -> 465 467 error t "eof-in-cdata"; 466 468 emit_str t "]]"; 467 469 emit_pending_chars (); 468 - ignore (S.process t.sink Tokenizer_token.EOF) 470 + let line, column = Tokenizer_stream.position t.stream in ignore (S.process t.sink Tokenizer_token.EOF ~line ~column) 469 471 | Tokenizer_state.Character_reference -> 470 472 (* state_character_reference never ran, so initialize temp_buffer with & *) 471 473 Buffer.clear t.temp_buffer; ··· 617 619 (* Emit pending chars first, then emit null separately for proper tree builder handling *) 618 620 emit_pending_chars (); 619 621 error t "unexpected-null-character"; 620 - ignore (S.process t.sink (Tokenizer_token.Character "\x00")) 622 + let line, column = Tokenizer_stream.position t.stream in 623 + ignore (S.process t.sink (Tokenizer_token.Character "\x00") ~line ~column) 621 624 | Some c -> 622 625 emit_char_checked c 623 626 | None -> ()
+15
test/dune
··· 54 54 (glob_files ../html5lib-tests/serializer/*.test)) 55 55 (action 56 56 (run %{exe:test_serializer.exe} ../html5lib-tests/serializer))) 57 + 58 + (executable 59 + (name test_nesting_checker) 60 + (modules test_nesting_checker) 61 + (libraries html5rw.checker)) 62 + 63 + (executable 64 + (name test_html5_checker) 65 + (modules test_html5_checker) 66 + (libraries bytesrw html5rw html5rw.checker str)) 67 + 68 + (rule 69 + (alias runtest) 70 + (action 71 + (run %{exe:test_html5_checker.exe})))
+298
test/test_html5_checker.ml
··· 1 + (** Tests for the html5_checker library *) 2 + 3 + (** Helper to create a reader from a string *) 4 + let reader_of_string s = Bytesrw.Bytes.Reader.of_string s 5 + 6 + (** Helper to check if a message contains a substring *) 7 + let message_contains msg substring = 8 + String.lowercase_ascii msg.Html5_checker.Message.message 9 + |> fun s -> String.length s >= String.length substring && 10 + try 11 + ignore (Str.search_forward (Str.regexp_case_fold (Str.quote substring)) s 0); 12 + true 13 + with Not_found -> false 14 + 15 + (** Test that valid HTML5 produces no errors *) 16 + let test_valid_html5 () = 17 + Printf.printf "Test 1: Valid HTML5 document\n"; 18 + let html = {|<!DOCTYPE html> 19 + <html lang="en"> 20 + <head><title>Test</title></head> 21 + <body><p>Hello world</p></body> 22 + </html>|} in 23 + let reader = reader_of_string html in 24 + let result = Html5_checker.check reader in 25 + let errors = Html5_checker.errors result in 26 + Printf.printf " Found %d error(s)\n" (List.length errors); 27 + if List.length errors > 0 then begin 28 + List.iter (fun msg -> 29 + Printf.printf " - %s\n" msg.Html5_checker.Message.message 30 + ) errors; 31 + end else 32 + Printf.printf " OK: No errors as expected\n" 33 + 34 + (** Test that missing DOCTYPE is detected *) 35 + let test_missing_doctype () = 36 + Printf.printf "\nTest 2: Missing DOCTYPE\n"; 37 + let html = "<html><body>Hello</body></html>" in 38 + let reader = reader_of_string html in 39 + let result = Html5_checker.check reader in 40 + let errors = Html5_checker.errors result in 41 + Printf.printf " Found %d error(s)\n" (List.length errors); 42 + if List.length errors = 0 then 43 + Printf.printf " Warning: Expected parse errors for missing DOCTYPE\n" 44 + else begin 45 + List.iter (fun msg -> 46 + Printf.printf " - %s\n" msg.Html5_checker.Message.message 47 + ) errors; 48 + end 49 + 50 + (** Test that obsolete elements are detected *) 51 + let test_obsolete_element () = 52 + Printf.printf "\nTest 3: Obsolete <center> element\n"; 53 + let html = "<!DOCTYPE html><html><body><center>Centered</center></body></html>" in 54 + let reader = reader_of_string html in 55 + let result = Html5_checker.check reader in 56 + let all_msgs = Html5_checker.messages result in 57 + Printf.printf " Found %d message(s)\n" (List.length all_msgs); 58 + let obsolete_msgs = List.filter (fun m -> 59 + message_contains m "obsolete" || message_contains m "center" 60 + ) all_msgs in 61 + if List.length obsolete_msgs > 0 then begin 62 + Printf.printf " Found obsolete-related messages:\n"; 63 + List.iter (fun msg -> 64 + Printf.printf " - %s\n" msg.Html5_checker.Message.message 65 + ) obsolete_msgs; 66 + end else 67 + Printf.printf " Note: No obsolete element warnings found (checker may not be enabled)\n" 68 + 69 + (** Test duplicate IDs *) 70 + let test_duplicate_id () = 71 + Printf.printf "\nTest 4: Duplicate ID attributes\n"; 72 + let html = {|<!DOCTYPE html><html><body> 73 + <div id="foo">First</div> 74 + <div id="foo">Second</div> 75 + </body></html>|} in 76 + let reader = reader_of_string html in 77 + let result = Html5_checker.check reader in 78 + let all_msgs = Html5_checker.messages result in 79 + Printf.printf " Found %d message(s)\n" (List.length all_msgs); 80 + let id_msgs = List.filter (fun m -> 81 + message_contains m "duplicate" || message_contains m "id" 82 + ) all_msgs in 83 + if List.length id_msgs > 0 then begin 84 + Printf.printf " Found ID-related messages:\n"; 85 + List.iter (fun msg -> 86 + Printf.printf " - %s\n" msg.Html5_checker.Message.message 87 + ) id_msgs; 88 + end else 89 + Printf.printf " Note: No duplicate ID errors found (checker may not be enabled)\n" 90 + 91 + (** Test heading structure *) 92 + let test_heading_skip () = 93 + Printf.printf "\nTest 5: Skipped heading level\n"; 94 + let html = {|<!DOCTYPE html><html><body> 95 + <h1>Title</h1> 96 + <h3>Skipped h2</h3> 97 + </body></html>|} in 98 + let reader = reader_of_string html in 99 + let result = Html5_checker.check reader in 100 + let all_msgs = Html5_checker.messages result in 101 + Printf.printf " Found %d message(s)\n" (List.length all_msgs); 102 + let heading_msgs = List.filter (fun m -> 103 + message_contains m "heading" || message_contains m "skip" 104 + ) all_msgs in 105 + if List.length heading_msgs > 0 then begin 106 + Printf.printf " Found heading-related messages:\n"; 107 + List.iter (fun msg -> 108 + Printf.printf " - %s\n" msg.Html5_checker.Message.message 109 + ) heading_msgs; 110 + end else 111 + Printf.printf " Note: No heading structure warnings found (checker may not be enabled)\n" 112 + 113 + (** Test img without alt *) 114 + let test_img_without_alt () = 115 + Printf.printf "\nTest 6: Image without alt attribute\n"; 116 + let html = {|<!DOCTYPE html><html><body> 117 + <img src="test.jpg"> 118 + </body></html>|} in 119 + let reader = reader_of_string html in 120 + let result = Html5_checker.check reader in 121 + let all_msgs = Html5_checker.messages result in 122 + Printf.printf " Found %d message(s)\n" (List.length all_msgs); 123 + let img_msgs = List.filter (fun m -> 124 + message_contains m "alt" || (message_contains m "img" && message_contains m "attribute") 125 + ) all_msgs in 126 + if List.length img_msgs > 0 then begin 127 + Printf.printf " Found img/alt-related messages:\n"; 128 + List.iter (fun msg -> 129 + Printf.printf " - %s\n" msg.Html5_checker.Message.message 130 + ) img_msgs; 131 + end else 132 + Printf.printf " Note: No missing alt attribute errors found (checker may not be enabled)\n" 133 + 134 + (** Test invalid nesting *) 135 + let test_invalid_nesting () = 136 + Printf.printf "\nTest 7: Invalid nesting - <a> inside <a>\n"; 137 + let html = {|<!DOCTYPE html><html><body> 138 + <a href="#">Link <a href="#">Nested</a></a> 139 + </body></html>|} in 140 + let reader = reader_of_string html in 141 + let result = Html5_checker.check reader in 142 + let all_msgs = Html5_checker.messages result in 143 + Printf.printf " Found %d message(s)\n" (List.length all_msgs); 144 + let nesting_msgs = List.filter (fun m -> 145 + message_contains m "nesting" || message_contains m "nested" || message_contains m "ancestor" 146 + ) all_msgs in 147 + if List.length nesting_msgs > 0 then begin 148 + Printf.printf " Found nesting-related messages:\n"; 149 + List.iter (fun msg -> 150 + Printf.printf " - %s\n" msg.Html5_checker.Message.message 151 + ) nesting_msgs; 152 + end else 153 + Printf.printf " Note: No nesting errors found (checker may not be enabled)\n" 154 + 155 + (** Test form inside form *) 156 + let test_form_nesting () = 157 + Printf.printf "\nTest 8: Invalid nesting - <form> inside <form>\n"; 158 + let html = {|<!DOCTYPE html><html><body> 159 + <form><form></form></form> 160 + </body></html>|} in 161 + let reader = reader_of_string html in 162 + let result = Html5_checker.check reader in 163 + let all_msgs = Html5_checker.messages result in 164 + Printf.printf " Found %d message(s)\n" (List.length all_msgs); 165 + let form_msgs = List.filter (fun m -> 166 + message_contains m "form" 167 + ) all_msgs in 168 + if List.length form_msgs > 0 then begin 169 + Printf.printf " Found form-related messages:\n"; 170 + List.iter (fun msg -> 171 + Printf.printf " - %s\n" msg.Html5_checker.Message.message 172 + ) form_msgs; 173 + end else 174 + Printf.printf " Note: No form nesting errors found (checker may not be enabled)\n" 175 + 176 + (** Test output formatting *) 177 + let test_output_formats () = 178 + Printf.printf "\nTest 9: Output format testing\n"; 179 + let html = {|<!DOCTYPE html><html><body><p>Test</p></body></html>|} in 180 + let reader = reader_of_string html in 181 + let result = Html5_checker.check reader in 182 + 183 + Printf.printf " Testing text format:\n"; 184 + let text_output = Html5_checker.format_text result in 185 + Printf.printf " Length: %d chars\n" (String.length text_output); 186 + 187 + Printf.printf " Testing JSON format:\n"; 188 + let json_output = Html5_checker.format_json result in 189 + Printf.printf " Length: %d chars\n" (String.length json_output); 190 + 191 + Printf.printf " Testing GNU format:\n"; 192 + let gnu_output = Html5_checker.format_gnu result in 193 + Printf.printf " Length: %d chars\n" (String.length gnu_output) 194 + 195 + (** Test has_errors function *) 196 + let test_has_errors () = 197 + Printf.printf "\nTest 10: has_errors function\n"; 198 + 199 + (* Valid document should have no errors *) 200 + let valid_html = "<!DOCTYPE html><html><body><p>Valid</p></body></html>" in 201 + let result1 = Html5_checker.check (reader_of_string valid_html) in 202 + Printf.printf " Valid document has_errors: %b\n" (Html5_checker.has_errors result1); 203 + 204 + (* Document with likely parse errors *) 205 + let invalid_html = "<html><body><p>Unclosed" in 206 + let result2 = Html5_checker.check (reader_of_string invalid_html) in 207 + Printf.printf " Invalid document has_errors: %b\n" (Html5_checker.has_errors result2) 208 + 209 + (** Test check_dom with pre-parsed document *) 210 + let test_check_dom () = 211 + Printf.printf "\nTest 11: check_dom with pre-parsed document\n"; 212 + let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in 213 + let reader = reader_of_string html in 214 + let parsed = Html5rw.parse reader in 215 + let result = Html5_checker.check_dom parsed in 216 + let all_msgs = Html5_checker.messages result in 217 + Printf.printf " check_dom found %d message(s)\n" (List.length all_msgs); 218 + Printf.printf " OK: check_dom completed successfully\n" 219 + 220 + (** Test system_id parameter *) 221 + let test_system_id () = 222 + Printf.printf "\nTest 12: system_id parameter\n"; 223 + let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in 224 + let reader = reader_of_string html in 225 + let result = Html5_checker.check ~system_id:"test.html" reader in 226 + match Html5_checker.system_id result with 227 + | Some id -> Printf.printf " system_id: %s\n" id 228 + | None -> Printf.printf " Warning: system_id not set\n" 229 + 230 + (** Test collect_parse_errors flag *) 231 + let test_collect_parse_errors_flag () = 232 + Printf.printf "\nTest 13: collect_parse_errors flag\n"; 233 + let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in 234 + 235 + let result_with = Html5_checker.check ~collect_parse_errors:true (reader_of_string html) in 236 + let msgs_with = Html5_checker.messages result_with in 237 + Printf.printf " With parse errors: %d message(s)\n" (List.length msgs_with); 238 + 239 + let result_without = Html5_checker.check ~collect_parse_errors:false (reader_of_string html) in 240 + let msgs_without = Html5_checker.messages result_without in 241 + Printf.printf " Without parse errors: %d message(s)\n" (List.length msgs_without) 242 + 243 + (** Test document accessor *) 244 + let test_document_accessor () = 245 + Printf.printf "\nTest 14: document accessor\n"; 246 + let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in 247 + let reader = reader_of_string html in 248 + let result = Html5_checker.check reader in 249 + let _doc = Html5_checker.document result in 250 + Printf.printf " OK: document accessor works\n" 251 + 252 + (** Test message severity filtering *) 253 + let test_severity_filtering () = 254 + Printf.printf "\nTest 15: Message severity filtering\n"; 255 + let html = "<!DOCTYPE html><html><body><p>Test</p></body></html>" in 256 + let reader = reader_of_string html in 257 + let result = Html5_checker.check reader in 258 + 259 + let all_msgs = Html5_checker.messages result in 260 + let errors = Html5_checker.errors result in 261 + let warnings = Html5_checker.warnings result in 262 + 263 + Printf.printf " Total messages: %d\n" (List.length all_msgs); 264 + Printf.printf " Errors: %d\n" (List.length errors); 265 + Printf.printf " Warnings: %d\n" (List.length warnings); 266 + 267 + (* Verify that errors + warnings <= all messages *) 268 + if List.length errors + List.length warnings <= List.length all_msgs then 269 + Printf.printf " OK: Message counts are consistent\n" 270 + else 271 + Printf.printf " Warning: Message counts inconsistent\n" 272 + 273 + (** Run all tests *) 274 + let () = 275 + Printf.printf "Running html5_checker tests...\n"; 276 + Printf.printf "========================================\n\n"; 277 + 278 + test_valid_html5 (); 279 + test_missing_doctype (); 280 + test_obsolete_element (); 281 + test_duplicate_id (); 282 + test_heading_skip (); 283 + test_img_without_alt (); 284 + test_invalid_nesting (); 285 + test_form_nesting (); 286 + test_output_formats (); 287 + test_has_errors (); 288 + test_check_dom (); 289 + test_system_id (); 290 + test_collect_parse_errors_flag (); 291 + test_document_accessor (); 292 + test_severity_filtering (); 293 + 294 + Printf.printf "\n========================================\n"; 295 + Printf.printf "All tests completed!\n"; 296 + Printf.printf "\nNote: Some checkers may not be enabled yet.\n"; 297 + Printf.printf "Tests marked with 'Note:' indicate features that may be\n"; 298 + Printf.printf "implemented in future versions.\n"
+126
test/test_nesting_checker.ml
··· 1 + (** Simple test for nesting_checker functionality *) 2 + 3 + let () = 4 + (* Create a message collector *) 5 + let collector = Html5_checker.Message_collector.create () in 6 + 7 + (* Get the nesting checker *) 8 + let module C = (val Html5_checker__Nesting_checker.checker : Html5_checker__Checker.S) in 9 + let state = C.create () in 10 + 11 + (* Test 1: <a> cannot contain another <a> *) 12 + Printf.printf "Test 1: Checking <a href> inside <a href>\n"; 13 + C.start_element state ~name:"a" ~namespace:None ~attrs:[("href", "#")] collector; 14 + C.start_element state ~name:"a" ~namespace:None ~attrs:[("href", "#")] collector; 15 + 16 + let errors1 = Html5_checker.Message_collector.errors collector in 17 + Printf.printf " Found %d error(s)\n" (List.length errors1); 18 + List.iter (fun msg -> 19 + Printf.printf " - %s\n" msg.Html5_checker.Message.message 20 + ) errors1; 21 + 22 + C.end_element state ~name:"a" ~namespace:None collector; 23 + C.end_element state ~name:"a" ~namespace:None collector; 24 + Html5_checker.Message_collector.clear collector; 25 + 26 + (* Test 2: <button> inside <a> *) 27 + Printf.printf "\nTest 2: Checking <button> inside <a href>\n"; 28 + C.start_element state ~name:"a" ~namespace:None ~attrs:[("href", "#")] collector; 29 + C.start_element state ~name:"button" ~namespace:None ~attrs:[] collector; 30 + 31 + let errors2 = Html5_checker.Message_collector.errors collector in 32 + Printf.printf " Found %d error(s)\n" (List.length errors2); 33 + List.iter (fun msg -> 34 + Printf.printf " - %s\n" msg.Html5_checker.Message.message 35 + ) errors2; 36 + 37 + C.end_element state ~name:"button" ~namespace:None collector; 38 + C.end_element state ~name:"a" ~namespace:None collector; 39 + Html5_checker.Message_collector.clear collector; 40 + 41 + (* Test 3: form inside form *) 42 + Printf.printf "\nTest 3: Checking <form> inside <form>\n"; 43 + C.start_element state ~name:"form" ~namespace:None ~attrs:[] collector; 44 + C.start_element state ~name:"form" ~namespace:None ~attrs:[] collector; 45 + 46 + let errors3 = Html5_checker.Message_collector.errors collector in 47 + Printf.printf " Found %d error(s)\n" (List.length errors3); 48 + List.iter (fun msg -> 49 + Printf.printf " - %s\n" msg.Html5_checker.Message.message 50 + ) errors3; 51 + 52 + C.end_element state ~name:"form" ~namespace:None collector; 53 + C.end_element state ~name:"form" ~namespace:None collector; 54 + Html5_checker.Message_collector.clear collector; 55 + 56 + (* Test 4: header inside footer *) 57 + Printf.printf "\nTest 4: Checking <header> inside <footer>\n"; 58 + C.start_element state ~name:"footer" ~namespace:None ~attrs:[] collector; 59 + C.start_element state ~name:"header" ~namespace:None ~attrs:[] collector; 60 + 61 + let errors4 = Html5_checker.Message_collector.errors collector in 62 + Printf.printf " Found %d error(s)\n" (List.length errors4); 63 + List.iter (fun msg -> 64 + Printf.printf " - %s\n" msg.Html5_checker.Message.message 65 + ) errors4; 66 + 67 + C.end_element state ~name:"header" ~namespace:None collector; 68 + C.end_element state ~name:"footer" ~namespace:None collector; 69 + Html5_checker.Message_collector.clear collector; 70 + 71 + (* Test 5: input (not hidden) inside button *) 72 + Printf.printf "\nTest 5: Checking <input type=text> inside <button>\n"; 73 + C.start_element state ~name:"button" ~namespace:None ~attrs:[] collector; 74 + C.start_element state ~name:"input" ~namespace:None ~attrs:[("type", "text")] collector; 75 + 76 + let errors5 = Html5_checker.Message_collector.errors collector in 77 + Printf.printf " Found %d error(s)\n" (List.length errors5); 78 + List.iter (fun msg -> 79 + Printf.printf " - %s\n" msg.Html5_checker.Message.message 80 + ) errors5; 81 + 82 + C.end_element state ~name:"input" ~namespace:None collector; 83 + C.end_element state ~name:"button" ~namespace:None collector; 84 + Html5_checker.Message_collector.clear collector; 85 + 86 + (* Test 6: valid nesting - should not error *) 87 + Printf.printf "\nTest 6: Checking valid nesting: <div> inside <div>\n"; 88 + C.start_element state ~name:"div" ~namespace:None ~attrs:[] collector; 89 + C.start_element state ~name:"div" ~namespace:None ~attrs:[] collector; 90 + 91 + let errors6 = Html5_checker.Message_collector.errors collector in 92 + Printf.printf " Found %d error(s)\n" (List.length errors6); 93 + if List.length errors6 = 0 then 94 + Printf.printf " OK: No errors as expected\n"; 95 + 96 + C.end_element state ~name:"div" ~namespace:None collector; 97 + C.end_element state ~name:"div" ~namespace:None collector; 98 + Html5_checker.Message_collector.clear collector; 99 + 100 + (* Test 7: area without map ancestor *) 101 + Printf.printf "\nTest 7: Checking <area> without <map> ancestor\n"; 102 + C.start_element state ~name:"area" ~namespace:None ~attrs:[] collector; 103 + 104 + let errors7 = Html5_checker.Message_collector.errors collector in 105 + Printf.printf " Found %d error(s)\n" (List.length errors7); 106 + List.iter (fun msg -> 107 + Printf.printf " - %s\n" msg.Html5_checker.Message.message 108 + ) errors7; 109 + 110 + C.end_element state ~name:"area" ~namespace:None collector; 111 + Html5_checker.Message_collector.clear collector; 112 + 113 + (* Test 8: area with map ancestor (valid) *) 114 + Printf.printf "\nTest 8: Checking <area> with <map> ancestor (valid)\n"; 115 + C.start_element state ~name:"map" ~namespace:None ~attrs:[] collector; 116 + C.start_element state ~name:"area" ~namespace:None ~attrs:[] collector; 117 + 118 + let errors8 = Html5_checker.Message_collector.errors collector in 119 + Printf.printf " Found %d error(s)\n" (List.length errors8); 120 + if List.length errors8 = 0 then 121 + Printf.printf " OK: No errors as expected\n"; 122 + 123 + C.end_element state ~name:"area" ~namespace:None collector; 124 + C.end_element state ~name:"map" ~namespace:None collector; 125 + 126 + Printf.printf "\nAll tests completed!\n"