OCaml HTML5 parser/serialiser based on Python's JustHTML

more

+14
lib/htmlrw_check/attr_utils.ml
··· 17 17 match get_attr name attrs with 18 18 | Some v -> String.trim v <> "" 19 19 | None -> false 20 + 21 + (** Create a unit hashtable from a list of keys for O(1) membership testing. *) 22 + let hashtbl_of_list items = 23 + let tbl = Hashtbl.create (List.length items) in 24 + List.iter (fun x -> Hashtbl.add tbl x ()) items; 25 + tbl 26 + 27 + (** Check a list of attributes and report errors for any that are present. *) 28 + let check_disallowed_attrs ~element ~collector ~attrs disallowed = 29 + List.iter (fun attr -> 30 + if has_attr attr attrs then 31 + Message_collector.add_typed collector 32 + (`Attr (`Not_allowed (`Attr attr, `Elem element))) 33 + ) disallowed
+25
lib/htmlrw_check/attr_utils.mli
··· 53 53 @param name The attribute name to look for (lowercase) 54 54 @param attrs The attribute list 55 55 @return [true] if the attribute exists and has a non-empty value *) 56 + 57 + (** {1 Utility Functions} *) 58 + 59 + val hashtbl_of_list : 'a list -> ('a, unit) Hashtbl.t 60 + (** [hashtbl_of_list items] creates a hashtable for O(1) membership testing. 61 + 62 + @param items List of keys to add 63 + @return A hashtable where each item maps to unit *) 64 + 65 + val check_disallowed_attrs : 66 + element:string -> 67 + collector:Message_collector.t -> 68 + attrs:attrs -> 69 + string list -> 70 + unit 71 + (** [check_disallowed_attrs ~element ~collector ~attrs disallowed] reports 72 + errors for any disallowed attributes that are present. 73 + 74 + This is a convenience function to reduce repetitive attribute checking 75 + code in checkers. 76 + 77 + @param element The element name for error messages 78 + @param collector The message collector 79 + @param attrs The attribute list to check 80 + @param disallowed List of attribute names that are not allowed *)
+25
lib/htmlrw_check/checker.ml
··· 67 67 | Some f -> f 68 68 | None -> fun _ _ -> () 69 69 end 70 + 71 + (** Create a checker from individual callback functions. 72 + This eliminates the boilerplate module wrapper at the end of each checker. *) 73 + let make 74 + (type s) 75 + ~(create : unit -> s) 76 + ~(reset : s -> unit) 77 + ~(start_element : s -> element:Element.t -> Message_collector.t -> unit) 78 + ~(end_element : s -> tag:Tag.element_tag -> Message_collector.t -> unit) 79 + ?(characters : (s -> string -> Message_collector.t -> unit) option) 80 + ?(end_document : (s -> Message_collector.t -> unit) option) 81 + () : t = 82 + (module struct 83 + type state = s 84 + let create = create 85 + let reset = reset 86 + let start_element = start_element 87 + let end_element = end_element 88 + let characters = match characters with 89 + | Some f -> f 90 + | None -> fun _ _ _ -> () 91 + let end_document = match end_document with 92 + | Some f -> f 93 + | None -> fun _ _ -> () 94 + end : S)
+32
lib/htmlrw_check/checker.mli
··· 214 214 ]} 215 215 *) 216 216 module Make : functor (I : Input) -> S with type state = I.state 217 + 218 + (** Create a checker from individual callback functions. 219 + 220 + This is a simpler alternative to the [Make] functor that eliminates the 221 + need for a module wrapper at the end of each checker file. 222 + 223 + {b Example:} 224 + {[ 225 + let checker = Checker.make 226 + ~create:(fun () -> { count = 0 }) 227 + ~reset:(fun s -> s.count <- 0) 228 + ~start_element:(fun s ~element collector -> ...) 229 + ~end_element:(fun s ~tag collector -> ...) 230 + () 231 + ]} 232 + 233 + @param create State initialization function 234 + @param reset State reset function 235 + @param start_element Element start callback 236 + @param end_element Element end callback 237 + @param characters Optional text content callback (default: no-op) 238 + @param end_document Optional document end callback (default: no-op) 239 + *) 240 + val make : 241 + create:(unit -> 's) -> 242 + reset:('s -> unit) -> 243 + start_element:('s -> element:Element.t -> Message_collector.t -> unit) -> 244 + end_element:('s -> tag:Tag.element_tag -> Message_collector.t -> unit) -> 245 + ?characters:('s -> string -> Message_collector.t -> unit) -> 246 + ?end_document:('s -> Message_collector.t -> unit) -> 247 + unit -> 248 + t
+2 -11
lib/htmlrw_check/content_model/content_checker.ml
··· 197 197 state.ancestor_stack 198 198 199 199 (* Package as first-class module *) 200 - let checker = 201 - (module struct 202 - type nonrec state = state 203 - 204 - let create = create 205 - let reset = reset 206 - let start_element = start_element 207 - let end_element = end_element 208 - let characters = characters 209 - let end_document = end_document 210 - end : Checker.S) 200 + let checker = Checker.make ~create ~reset ~start_element ~end_element 201 + ~characters ~end_document ()
-30
lib/htmlrw_check/content_model/content_checker.mli
··· 18 18 2. Checking each child element or text node against the content model 19 19 3. Tracking the ancestor stack to detect prohibited relationships 20 20 4. Emitting appropriate errors or warnings for violations 21 - 22 - {2 Usage Example} 23 - 24 - {[ 25 - let checker = Content_checker.create (Message_collector.create ()) in 26 - let module C = (val checker : Checker.S) in 27 - let state = C.create () in 28 - 29 - (* Walk the DOM tree *) 30 - C.start_element state ~name:"div" ~namespace:None ~attrs:[] collector; 31 - C.characters state "Hello, world!" collector; 32 - C.end_element state ~name:"div" ~namespace:None collector; 33 - C.end_document state collector 34 - ]} 35 21 *) 36 - 37 - (** Include the standard checker signature. *) 38 - include Checker.S 39 - 40 - (** {1 Creation} *) 41 - 42 - val create_with_registry : ?registry:Element_registry.t -> Message_collector.t -> state 43 - (** [create_with_registry ?registry collector] creates a content checker with an 44 - optional custom element registry. 45 - 46 - If no registry is provided, uses {!Element_registry.default}. 47 - 48 - @param registry Custom element registry (defaults to standard HTML5 elements) 49 - @param collector Message collector for validation messages *) 50 - 51 - (** {1 First-Class Module} *) 52 22 53 23 val checker : Checker.t 54 24 (** [checker] is the content checker packaged as a first-class module.
+1 -14
lib/htmlrw_check/semantic/autofocus_checker.ml
··· 75 75 76 76 state.current_depth <- state.current_depth - 1 77 77 78 - let characters _state _text _collector = () 79 - 80 - let end_document _state _collector = () 81 - 82 - let checker = 83 - (module struct 84 - type nonrec state = state 85 - let create = create 86 - let reset = reset 87 - let start_element = start_element 88 - let end_element = end_element 89 - let characters = characters 90 - let end_document = end_document 91 - end : Checker.S) 78 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
+2 -15
lib/htmlrw_check/semantic/form_checker.ml
··· 4 4 checks (like button-outside-form and label references) don't match 5 5 Nu validator's behavior. *) 6 6 7 - type state = unit 7 + type state = unit [@@warning "-34"] 8 8 9 9 let create () = () 10 10 ··· 44 44 45 45 let end_element _state ~tag:_ _collector = () 46 46 47 - let characters _state _text _collector = () 48 - 49 - let end_document _state _collector = () 50 - 51 - let checker = (module struct 52 - type nonrec state = state 53 - 54 - let create = create 55 - let reset = reset 56 - let start_element = start_element 57 - let end_element = end_element 58 - let characters = characters 59 - let end_document = end_document 60 - end : Checker.S) 47 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
-2
lib/htmlrw_check/semantic/form_checker.mli
··· 50 50 @see <https://www.w3.org/WAI/WCAG21/Understanding/labels-or-instructions.html> 51 51 WCAG: Labels or Instructions *) 52 52 53 - include Checker.S 54 - 55 53 val checker : Checker.t 56 54 (** A first-class module instance of this checker. 57 55
+3 -15
lib/htmlrw_check/semantic/id_checker.ml
··· 193 193 | _ -> ()) 194 194 | _ -> ()) 195 195 196 - let end_element _state ~tag:_ _collector = 197 - () 198 - 199 - let characters _state _text _collector = 200 - () 196 + let end_element _state ~tag:_ _collector = () 201 197 202 198 let end_document state collector = 203 199 (* Check all ID references point to existing IDs *) ··· 224 220 (Error_code.q ref.attribute) (Error_code.q ref.referring_element) (Error_code.q ref.referenced_id))) 225 221 ) state.usemap_references 226 222 227 - let checker = (module struct 228 - type nonrec state = state 229 - 230 - let create = create 231 - let reset = reset 232 - let start_element = start_element 233 - let end_element = end_element 234 - let characters = characters 235 - let end_document = end_document 236 - end : Checker.S) 223 + let checker = Checker.make ~create ~reset ~start_element ~end_element 224 + ~end_document ()
-2
lib/htmlrw_check/semantic/id_checker.mli
··· 5 5 - ID references (for, headers, aria-*, etc.) point to existing IDs 6 6 - ID values conform to HTML5 requirements *) 7 7 8 - include Checker.S 9 - 10 8 val checker : Checker.t 11 9 (** [checker] is a checker instance for validating ID uniqueness and references. *)
+2 -10
lib/htmlrw_check/semantic/lang_detecting_checker.ml
··· 326 326 | _ -> () 327 327 end 328 328 329 - let checker = 330 - (module struct 331 - type nonrec state = state 332 - let create = create 333 - let reset = reset 334 - let start_element = start_element 335 - let end_element = end_element 336 - let characters = characters 337 - let end_document = end_document 338 - end : Checker.S) 329 + let checker = Checker.make ~create ~reset ~start_element ~end_element 330 + ~characters ~end_document ()
+1 -17
lib/htmlrw_check/semantic/nesting_checker.ml
··· 350 350 end 351 351 | _ -> () 352 352 353 - let characters _state _text _collector = 354 - () (* No text-specific nesting checks *) 355 - 356 - let end_document _state _collector = 357 - () (* No document-level checks needed *) 358 - 359 353 (** Create the checker as a first-class module. *) 360 - let checker = 361 - let module M = struct 362 - type nonrec state = state 363 - let create = create 364 - let reset = reset 365 - let start_element = start_element 366 - let end_element = end_element 367 - let characters = characters 368 - let end_document = end_document 369 - end in 370 - (module M : Checker.S) 354 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
-2
lib/htmlrw_check/semantic/nesting_checker.mli
··· 73 73 HTML5 specification: Content models 74 74 *) 75 75 76 - include Checker.S 77 - 78 76 val checker : Checker.t 79 77 (** [checker] is a checker instance for validating element nesting rules. *)
+1 -15
lib/htmlrw_check/semantic/obsolete_checker.ml
··· 315 315 | Tag.Html `Head -> state.in_head <- false 316 316 | _ -> () 317 317 318 - let characters _state _text _collector = () 319 - 320 - let end_document _state _collector = () 321 - 322 - let checker = 323 - let module M = struct 324 - type nonrec state = state 325 - let create = create 326 - let reset = reset 327 - let start_element = start_element 328 - let end_element = end_element 329 - let characters = characters 330 - let end_document = end_document 331 - end in 332 - (module M : Checker.S) 318 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
-5
lib/htmlrw_check/semantic/obsolete_checker.mli
··· 52 52 ]} 53 53 *) 54 54 55 - (** Include the standard checker signature. *) 56 - include Checker.S 57 - 58 - (** {1 Checker Instance} *) 59 - 60 55 val checker : Checker.t 61 56 (** [checker] is a pre-configured obsolete checker instance that can be 62 57 registered with the checker registry.
+2 -12
lib/htmlrw_check/semantic/option_checker.ml
··· 64 64 | [] -> () 65 65 end 66 66 67 - let end_document _state _collector = () 68 - 69 - let checker = 70 - (module struct 71 - type nonrec state = state 72 - let create = create 73 - let reset = reset 74 - let start_element = start_element 75 - let end_element = end_element 76 - let characters = characters 77 - let end_document = end_document 78 - end : Checker.S) 67 + let checker = Checker.make ~create ~reset ~start_element ~end_element 68 + ~characters ()
+1 -14
lib/htmlrw_check/semantic/required_attr_checker.ml
··· 204 204 | Tag.Html `A -> state.in_a_with_href <- false 205 205 | _ -> () 206 206 207 - let characters _state _text _collector = () 208 - 209 - let end_document _state _collector = () 210 - 211 - let checker = (module struct 212 - type nonrec state = state 213 - 214 - let create = create 215 - let reset = reset 216 - let start_element = start_element 217 - let end_element = end_element 218 - let characters = characters 219 - let end_document = end_document 220 - end : Checker.S) 207 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
-2
lib/htmlrw_check/semantic/required_attr_checker.mli
··· 38 38 @see <https://html.spec.whatwg.org/multipage/indices.html#attributes-3> 39 39 WHATWG HTML: Attributes *) 40 40 41 - include Checker.S 42 - 43 41 val checker : Checker.t 44 42 (** A first-class module instance of this checker. 45 43
+2 -11
lib/htmlrw_check/specialized/aria_checker.ml
··· 776 776 | [] -> ()) (* Stack underflow - shouldn't happen in well-formed docs *) 777 777 | _ -> () 778 778 779 - let characters _state _text _collector = () 780 - 781 779 let end_document state collector = 782 780 (* Check that active tabs have corresponding tabpanels *) 783 781 if state.has_active_tab && not state.has_tabpanel then ··· 787 785 if state.visible_main_count > 1 then 788 786 Message_collector.add_typed collector (`Aria `Multiple_main) 789 787 790 - let checker = (module struct 791 - type nonrec state = state 792 - let create = create 793 - let reset = reset 794 - let start_element = start_element 795 - let end_element = end_element 796 - let characters = characters 797 - let end_document = end_document 798 - end : Checker.S) 788 + let checker = Checker.make ~create ~reset ~start_element ~end_element 789 + ~end_document ()
-2
lib/htmlrw_check/specialized/aria_checker.mli
··· 96 96 WAI-ARIA 1.2 specification 97 97 *) 98 98 99 - include Checker.S 100 - 101 99 val checker : Checker.t 102 100 (** [checker] is a checker instance for validating ARIA roles and attributes. *)
+1 -12
lib/htmlrw_check/specialized/attr_restrictions_checker.ml
··· 468 468 | _ -> () (* Skip non-HTML elements *) 469 469 470 470 let end_element _state ~tag:_ _collector = () 471 - let characters _state _text _collector = () 472 - let end_document _state _collector = () 473 471 474 - let checker = 475 - (module struct 476 - type nonrec state = state 477 - let create = create 478 - let reset = reset 479 - let start_element = start_element 480 - let end_element = end_element 481 - let characters = characters 482 - let end_document = end_document 483 - end : Checker.S) 472 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
+1 -12
lib/htmlrw_check/specialized/base_checker.ml
··· 26 26 | _ -> () 27 27 28 28 let end_element _state ~tag:_ _collector = () 29 - let characters _state _text _collector = () 30 - let end_document _state _collector = () 31 29 32 - let checker = 33 - (module struct 34 - type nonrec state = state 35 - let create = create 36 - let reset = reset 37 - let start_element = start_element 38 - let end_element = end_element 39 - let characters = characters 40 - let end_document = end_document 41 - end : Checker.S) 30 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
+2 -13
lib/htmlrw_check/specialized/datetime_checker.ml
··· 439 439 end 440 440 441 441 (** Checker state *) 442 - type state = unit 442 + type state = unit [@@warning "-34"] 443 443 444 444 let create () = () 445 445 let reset _state = () ··· 470 470 | _ -> () (* Non-HTML elements don't have datetime attributes *) 471 471 472 472 let end_element _state ~tag:_ _collector = () 473 - let characters _state _text _collector = () 474 - let end_document _state _collector = () 475 473 476 - let checker = 477 - (module struct 478 - type nonrec state = state 479 - let create = create 480 - let reset = reset 481 - let start_element = start_element 482 - let end_element = end_element 483 - let characters = characters 484 - let end_document = end_document 485 - end : Checker.S) 474 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
+2 -12
lib/htmlrw_check/specialized/dl_checker.ml
··· 254 254 end 255 255 end 256 256 257 - let end_document _state _collector = () 258 - 259 - let checker = 260 - (module struct 261 - type nonrec state = state 262 - let create = create 263 - let reset = reset 264 - let start_element = start_element 265 - let end_element = end_element 266 - let characters = characters 267 - let end_document = end_document 268 - end : Checker.S) 257 + let checker = Checker.make ~create ~reset ~start_element ~end_element 258 + ~characters ()
+1 -13
lib/htmlrw_check/specialized/h1_checker.ml
··· 34 34 state.svg_depth <- state.svg_depth - 1 35 35 | _ -> () 36 36 37 - let characters _state _text _collector = () 38 - let end_document _state _collector = () 39 - 40 - let checker = 41 - (module struct 42 - type nonrec state = state 43 - let create = create 44 - let reset = reset 45 - let start_element = start_element 46 - let end_element = end_element 47 - let characters = characters 48 - let end_document = end_document 49 - end : Checker.S) 37 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
+2 -10
lib/htmlrw_check/specialized/heading_checker.ml
··· 126 126 Message_collector.add_typed collector 127 127 (`Generic "Document contains no heading elements (h1-h6). Headings provide important document structure for accessibility") 128 128 129 - let checker = (module struct 130 - type nonrec state = state 131 - 132 - let create = create 133 - let reset = reset 134 - let start_element = start_element 135 - let end_element = end_element 136 - let characters = characters 137 - let end_document = end_document 138 - end : Checker.S) 129 + let checker = Checker.make ~create ~reset ~start_element ~end_element 130 + ~characters ~end_document ()
-2
lib/htmlrw_check/specialized/heading_checker.mli
··· 6 6 - Multiple h1 usage patterns 7 7 - Headings should not be empty *) 8 8 9 - include Checker.S 10 - 11 9 val checker : Checker.t 12 10 (** [checker] is a checker instance for validating heading structure. *)
+2 -12
lib/htmlrw_check/specialized/importmap_checker.ml
··· 307 307 if state.in_importmap then 308 308 Buffer.add_string state.content text 309 309 310 - let end_document _state _collector = () 311 - 312 - let checker = 313 - (module struct 314 - type nonrec state = state 315 - let create = create 316 - let reset = reset 317 - let start_element = start_element 318 - let end_element = end_element 319 - let characters = characters 320 - let end_document = end_document 321 - end : Checker.S) 310 + let checker = Checker.make ~create ~reset ~start_element ~end_element 311 + ~characters ()
+7 -24
lib/htmlrw_check/specialized/label_checker.ml
··· 5 5 (** Labelable elements that label can reference *) 6 6 let labelable_elements = ["button"; "input"; "meter"; "output"; "progress"; "select"; "textarea"] 7 7 8 - (** Helper to get attribute value *) 9 - let get_attr attrs name = 10 - let name_lower = String.lowercase_ascii name in 11 - List.find_map (fun (n, v) -> 12 - if String.lowercase_ascii n = name_lower then Some v else None 13 - ) attrs 14 - 15 8 type label_for_info = { 16 9 for_target : string; 17 10 has_role : bool; ··· 56 49 state.in_label <- true; 57 50 state.label_depth <- 1; (* Start at 1 for the label element itself *) 58 51 state.labelable_count <- 0; 59 - let for_value = get_attr element.raw_attrs "for" in 60 - let has_role = get_attr element.raw_attrs "role" <> None in 61 - let has_aria_label = get_attr element.raw_attrs "aria-label" <> None in 52 + let for_value = Attr_utils.get_attr "for" element.raw_attrs in 53 + let has_role = Attr_utils.get_attr "role" element.raw_attrs <> None in 54 + let has_aria_label = Attr_utils.get_attr "aria-label" element.raw_attrs <> None in 62 55 state.label_for_value <- for_value; 63 56 state.label_has_role <- has_role; 64 57 state.label_has_aria_label <- has_aria_label; ··· 73 66 74 67 (* Track labelable element IDs *) 75 68 (if List.mem name_lower labelable_elements then 76 - match get_attr element.raw_attrs "id" with 69 + match Attr_utils.get_attr "id" element.raw_attrs with 77 70 | Some id -> state.labelable_ids <- id :: state.labelable_ids 78 71 | None -> ()); 79 72 ··· 89 82 (* Check if label has for attribute and descendant has mismatched id *) 90 83 (match state.label_for_value with 91 84 | Some for_value -> 92 - let descendant_id = get_attr element.raw_attrs "id" in 85 + let descendant_id = Attr_utils.get_attr "id" element.raw_attrs in 93 86 (match descendant_id with 94 87 | None -> 95 88 Message_collector.add_typed collector (`Label `For_id_mismatch) ··· 120 113 | _ -> () 121 114 end 122 115 123 - let characters _state _text _collector = () 124 - 125 116 let end_document state collector = 126 117 List.iter (fun label_info -> 127 118 if List.mem label_info.for_target state.labelable_ids then begin ··· 132 123 end 133 124 ) state.labels_for 134 125 135 - let checker = 136 - (module struct 137 - type nonrec state = state 138 - let create = create 139 - let reset = reset 140 - let start_element = start_element 141 - let end_element = end_element 142 - let characters = characters 143 - let end_document = end_document 144 - end : Checker.S) 126 + let checker = Checker.make ~create ~reset ~start_element ~end_element 127 + ~end_document ()
+3 -21
lib/htmlrw_check/specialized/language_checker.ml
··· 3 3 Validates language attributes. *) 4 4 5 5 (** Checker state - currently minimal since we only check attributes. *) 6 - type state = unit 6 + type state = unit [@@warning "-34"] 7 7 8 8 let create () = () 9 9 ··· 94 94 let name = Tag.tag_to_string element.Element.tag in 95 95 process_language_attrs ~element:name ~namespace:None ~attrs:element.raw_attrs ~location collector 96 96 97 - let end_element _state ~tag:_ _collector = 98 - () 99 - 100 - let characters _state _text _collector = 101 - () 102 - 103 - let end_document _state _collector = 104 - (* Note: The "missing lang on html" warning is only produced for specific 105 - test cases in the Nu validator. We don't produce it by default. *) 106 - () 107 - 108 - let checker = (module struct 109 - type nonrec state = state 97 + let end_element _state ~tag:_ _collector = () 110 98 111 - let create = create 112 - let reset = reset 113 - let start_element = start_element 114 - let end_element = end_element 115 - let characters = characters 116 - let end_document = end_document 117 - end : Checker.S) 99 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
-2
lib/htmlrw_check/specialized/language_checker.mli
··· 12 12 - Empty lang="" is valid (indicates unknown language) 13 13 - Primary language subtag should be valid *) 14 14 15 - include Checker.S 16 - 17 15 val checker : Checker.t 18 16 (** [checker] is a checker instance for validating language attributes. *)
+2 -13
lib/htmlrw_check/specialized/microdata_checker.ml
··· 288 288 state.scope_stack <- rest 289 289 | _ -> () 290 290 291 - let characters _state _text _collector = 292 - () 293 - 294 291 let end_document state collector = 295 292 (* Check all itemref references point to existing IDs *) 296 293 List.iter (fun ref -> ··· 306 303 (* Detect itemref cycles *) 307 304 detect_itemref_cycles state collector 308 305 309 - let checker = (module struct 310 - type nonrec state = state 311 - 312 - let create = create 313 - let reset = reset 314 - let start_element = start_element 315 - let end_element = end_element 316 - let characters = characters 317 - let end_document = end_document 318 - end : Checker.S) 306 + let checker = Checker.make ~create ~reset ~start_element ~end_element 307 + ~end_document ()
-2
lib/htmlrw_check/specialized/microdata_checker.mli
··· 12 12 - Detects itemref cycles (A references B, B references A) 13 13 - itemprop values must be valid property names (no colons unless URL) *) 14 14 15 - include Checker.S 16 - 17 15 val checker : Checker.t 18 16 (** [checker] is a checker instance for validating microdata. *)
+2 -13
lib/htmlrw_check/specialized/mime_type_checker.ml
··· 148 148 ("object", ["type"]); 149 149 ] 150 150 151 - type state = unit 151 + type state = unit [@@warning "-34"] 152 152 153 153 let create () = () 154 154 let reset _state = () ··· 193 193 | _ -> () (* Non-HTML elements don't have MIME type checks *) 194 194 195 195 let end_element _state ~tag:_ _collector = () 196 - let characters _state _text _collector = () 197 - let end_document _state _collector = () 198 196 199 - let checker = 200 - (module struct 201 - type nonrec state = state 202 - let create = create 203 - let reset = reset 204 - let start_element = start_element 205 - let end_element = end_element 206 - let characters = characters 207 - let end_document = end_document 208 - end : Checker.S) 197 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
+3 -13
lib/htmlrw_check/specialized/normalization_checker.ml
··· 2 2 3 3 Validates that text content is in Unicode Normalization Form C (NFC). *) 4 4 5 - type state = unit 5 + type state = unit [@@warning "-34"] 6 6 7 7 let create () = () 8 8 let reset _state = () ··· 56 56 (`I18n (`Not_nfc (`Replacement replacement))) 57 57 end 58 58 59 - let end_document _state _collector = () 60 - 61 - let checker = 62 - (module struct 63 - type nonrec state = state 64 - let create = create 65 - let reset = reset 66 - let start_element = start_element 67 - let end_element = end_element 68 - let characters = characters 69 - let end_document = end_document 70 - end : Checker.S) 59 + let checker = Checker.make ~create ~reset ~start_element ~end_element 60 + ~characters ()
+2 -12
lib/htmlrw_check/specialized/picture_checker.ml
··· 218 218 (`Element (`Text_not_allowed (`Parent "picture"))) 219 219 end 220 220 221 - let end_document _state _collector = () 222 - 223 - let checker = 224 - (module struct 225 - type nonrec state = state 226 - let create = create 227 - let reset = reset 228 - let start_element = start_element 229 - let end_element = end_element 230 - let characters = characters 231 - let end_document = end_document 232 - end : Checker.S) 221 + let checker = Checker.make ~create ~reset ~start_element ~end_element 222 + ~characters ()
+2 -12
lib/htmlrw_check/specialized/ruby_checker.ml
··· 117 117 | [] -> () 118 118 end 119 119 120 - let end_document _state _collector = () 121 - 122 - let checker = 123 - (module struct 124 - type nonrec state = state 125 - let create = create 126 - let reset = reset 127 - let start_element = start_element 128 - let end_element = end_element 129 - let characters = characters 130 - let end_document = end_document 131 - end : Checker.S) 120 + let checker = Checker.make ~create ~reset ~start_element ~end_element 121 + ~characters ()
+5 -26
lib/htmlrw_check/specialized/source_checker.ml
··· 35 35 let ctx = current_context state in 36 36 (match ctx with 37 37 | Video | Audio -> 38 - if Attr_utils.has_attr "srcset" element.raw_attrs then 39 - Message_collector.add_typed collector 40 - (`Attr (`Not_allowed (`Attr "srcset", `Elem "source"))); 41 - if Attr_utils.has_attr "sizes" element.raw_attrs then 42 - Message_collector.add_typed collector 43 - (`Attr (`Not_allowed (`Attr "sizes", `Elem "source"))); 44 - if Attr_utils.has_attr "width" element.raw_attrs then 45 - Message_collector.add_typed collector 46 - (`Attr (`Not_allowed (`Attr "width", `Elem "source"))); 47 - if Attr_utils.has_attr "height" element.raw_attrs then 48 - Message_collector.add_typed collector 49 - (`Attr (`Not_allowed (`Attr "height", `Elem "source"))) 38 + (* These attributes are only valid on source in picture, not audio/video *) 39 + Attr_utils.check_disallowed_attrs 40 + ~element:"source" ~collector ~attrs:element.raw_attrs 41 + ["srcset"; "sizes"; "width"; "height"] 50 42 | Picture | Other -> ()) 51 43 | _ -> () 52 44 ··· 58 50 | [] -> ()) 59 51 | _ -> () 60 52 61 - let characters _state _text _collector = () 62 - 63 - let end_document _state _collector = () 64 - 65 - let checker = 66 - (module struct 67 - type nonrec state = state 68 - let create = create 69 - let reset = reset 70 - let start_element = start_element 71 - let end_element = end_element 72 - let characters = characters 73 - let end_document = end_document 74 - end : Checker.S) 53 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
+2 -13
lib/htmlrw_check/specialized/srcset_sizes_checker.ml
··· 12 12 "cm"; "mm"; "q"; "in"; "pc"; "pt"; "px" 13 13 ] 14 14 15 - type state = unit 15 + type state = unit [@@warning "-34"] 16 16 17 17 let create () = () 18 18 let reset _state = () ··· 945 945 | _ -> () (* Other elements *) 946 946 947 947 let end_element _state ~tag:_ _collector = () 948 - let characters _state _text _collector = () 949 - let end_document _state _collector = () 950 948 951 - let checker = 952 - (module struct 953 - type nonrec state = state 954 - let create = create 955 - let reset = reset 956 - let start_element = start_element 957 - let end_element = end_element 958 - let characters = characters 959 - let end_document = end_document 960 - end : Checker.S) 949 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
+1 -14
lib/htmlrw_check/specialized/svg_checker.ml
··· 506 506 state.in_svg <- false 507 507 end 508 508 509 - let characters _state _text _collector = () 510 - 511 - let end_document _state _collector = () 512 - 513 - let checker = 514 - (module struct 515 - type nonrec state = state 516 - let create = create 517 - let reset = reset 518 - let start_element = start_element 519 - let end_element = end_element 520 - let characters = characters 521 - let end_document = end_document 522 - end : Checker.S) 509 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
+2 -13
lib/htmlrw_check/specialized/table_checker.ml
··· 735 735 | _ -> ())) 736 736 | _ -> () (* Non-HTML elements *) 737 737 738 - let characters _state _text _collector = () 739 - 740 738 let end_document state collector = 741 739 if !(state.tables) <> [] then 742 740 Message_collector.add_typed collector 743 741 (`Generic "Unclosed table element at end of document.") 744 742 745 - let checker = 746 - (module struct 747 - type nonrec state = state 748 - 749 - let create = create 750 - let reset = reset 751 - let start_element = start_element 752 - let end_element = end_element 753 - let characters = characters 754 - let end_document = end_document 755 - end : Checker.S) 743 + let checker = Checker.make ~create ~reset ~start_element ~end_element 744 + ~end_document ()
-2
lib/htmlrw_check/specialized/table_checker.mli
··· 71 71 @see <https://html.spec.whatwg.org/multipage/tables.html> WHATWG HTML: Tables 72 72 @see <https://www.w3.org/TR/html52/tabular-data.html> W3C HTML 5.2: Tables *) 73 73 74 - include Checker.S 75 - 76 74 val checker : Checker.t 77 75 (** A first-class module instance of this checker. 78 76
+2 -12
lib/htmlrw_check/specialized/title_checker.ml
··· 60 60 state.title_has_content <- true 61 61 end 62 62 63 - let end_document _state _collector = () 64 - 65 - let checker = 66 - (module struct 67 - type nonrec state = state 68 - let create = create 69 - let reset = reset 70 - let start_element = start_element 71 - let end_element = end_element 72 - let characters = characters 73 - let end_document = end_document 74 - end : Checker.S) 63 + let checker = Checker.make ~create ~reset ~start_element ~end_element 64 + ~characters ()
+1 -14
lib/htmlrw_check/specialized/unknown_element_checker.ml
··· 44 44 | [] -> ()) (* Stack underflow - shouldn't happen *) 45 45 | _ -> () (* SVG, MathML, Custom elements *) 46 46 47 - let characters _state _text _collector = () 48 - 49 - let end_document _state _collector = () 50 - 51 - let checker = 52 - (module struct 53 - type nonrec state = state 54 - let create = create 55 - let reset = reset 56 - let start_element = start_element 57 - let end_element = end_element 58 - let characters = characters 59 - let end_document = end_document 60 - end : Checker.S) 47 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
+2 -13
lib/htmlrw_check/specialized/url_checker.ml
··· 733 733 end 734 734 735 735 (** Checker state. *) 736 - type state = unit 736 + type state = unit [@@warning "-34"] 737 737 738 738 let create () = () 739 739 let reset _state = () ··· 816 816 | _ -> () (* Non-HTML elements *) 817 817 818 818 let end_element _state ~tag:_ _collector = () 819 - let characters _state _text _collector = () 820 - let end_document _state _collector = () 821 819 822 - let checker = 823 - (module struct 824 - type nonrec state = state 825 - let create = create 826 - let reset = reset 827 - let start_element = start_element 828 - let end_element = end_element 829 - let characters = characters 830 - let end_document = end_document 831 - end : Checker.S) 820 + let checker = Checker.make ~create ~reset ~start_element ~end_element ()
+2 -12
lib/htmlrw_check/specialized/xhtml_content_checker.ml
··· 133 133 (`Element (`Text_not_allowed (`Parent parent_lower))) 134 134 end 135 135 136 - let end_document _state _collector = () 137 - 138 - let checker = 139 - (module struct 140 - type nonrec state = state 141 - let create = create 142 - let reset = reset 143 - let start_element = start_element 144 - let end_element = end_element 145 - let characters = characters 146 - let end_document = end_document 147 - end : Checker.S) 136 + let checker = Checker.make ~create ~reset ~start_element ~end_element 137 + ~characters ()