+113
lib/html5_checker/specialized/unknown_element_checker.ml
+113
lib/html5_checker/specialized/unknown_element_checker.ml
···
···
1
+
(** Unknown HTML element checker.
2
+
3
+
Detects elements that are not in the HTML5 specification and produces
4
+
appropriate error messages. Custom elements (with hyphens) are allowed. *)
5
+
6
+
(** Set of all known HTML5 element names. *)
7
+
let known_elements =
8
+
let elements = [
9
+
(* Document metadata *)
10
+
"html"; "head"; "title"; "base"; "link"; "meta"; "style";
11
+
12
+
(* Sections *)
13
+
"body"; "article"; "section"; "nav"; "aside"; "h1"; "h2"; "h3"; "h4"; "h5"; "h6";
14
+
"hgroup"; "header"; "footer"; "address"; "main";
15
+
16
+
(* Grouping content *)
17
+
"p"; "hr"; "pre"; "blockquote"; "ol"; "ul"; "menu"; "li"; "dl"; "dt"; "dd";
18
+
"figure"; "figcaption"; "div";
19
+
20
+
(* Text-level semantics *)
21
+
"a"; "em"; "strong"; "small"; "s"; "cite"; "q"; "dfn"; "abbr"; "ruby"; "rt"; "rp";
22
+
"data"; "time"; "code"; "var"; "samp"; "kbd"; "sub"; "sup"; "i"; "b"; "u"; "mark";
23
+
"bdi"; "bdo"; "span"; "br"; "wbr"; "search";
24
+
25
+
(* Edits *)
26
+
"ins"; "del";
27
+
28
+
(* Embedded content *)
29
+
"picture"; "source"; "img"; "iframe"; "embed"; "object"; "video"; "audio";
30
+
"track"; "map"; "area"; "math"; "svg";
31
+
32
+
(* Tables *)
33
+
"table"; "caption"; "colgroup"; "col"; "tbody"; "thead"; "tfoot"; "tr"; "td"; "th";
34
+
35
+
(* Forms *)
36
+
"form"; "label"; "input"; "button"; "select"; "datalist"; "optgroup"; "option";
37
+
"textarea"; "output"; "progress"; "meter"; "fieldset"; "legend";
38
+
39
+
(* Interactive *)
40
+
"details"; "summary"; "dialog";
41
+
42
+
(* Scripting *)
43
+
"script"; "noscript"; "template"; "slot"; "canvas";
44
+
45
+
(* Deprecated but still recognized *)
46
+
"param";
47
+
] in
48
+
let tbl = Hashtbl.create (List.length elements) in
49
+
List.iter (fun el -> Hashtbl.add tbl el ()) elements;
50
+
tbl
51
+
52
+
(** Check if an element name is a custom element (contains hyphen). *)
53
+
let is_custom_element name =
54
+
String.contains name '-'
55
+
56
+
(** Check if an element name is known. *)
57
+
let is_known_element name =
58
+
let name_lower = String.lowercase_ascii name in
59
+
Hashtbl.mem known_elements name_lower || is_custom_element name_lower
60
+
61
+
type state = {
62
+
mutable stack : string list; (* Parent element stack *)
63
+
}
64
+
65
+
let create () = { stack = [] }
66
+
67
+
let reset state =
68
+
state.stack <- []
69
+
70
+
let start_element state ~name ~namespace ~attrs:_ collector =
71
+
(* Only check HTML namespace elements *)
72
+
match namespace with
73
+
| Some _ -> () (* Skip SVG, MathML, etc. *)
74
+
| None ->
75
+
let name_lower = String.lowercase_ascii name in
76
+
77
+
(* Check if element is unknown *)
78
+
if not (is_known_element name_lower) then begin
79
+
(* Get the parent element name *)
80
+
let parent = match state.stack with
81
+
| p :: _ -> p
82
+
| [] -> "document"
83
+
in
84
+
(* Produce error: unknown element not allowed as child *)
85
+
Message_collector.add_typed collector
86
+
(Error_code.Element_not_allowed_as_child { child = name; parent })
87
+
end;
88
+
89
+
(* Always push to stack for tracking *)
90
+
state.stack <- name_lower :: state.stack
91
+
92
+
let end_element state ~name:_ ~namespace _ =
93
+
match namespace with
94
+
| Some _ -> ()
95
+
| None ->
96
+
match state.stack with
97
+
| _ :: rest -> state.stack <- rest
98
+
| [] -> () (* Stack underflow - shouldn't happen *)
99
+
100
+
let characters _state _text _collector = ()
101
+
102
+
let end_document _state _collector = ()
103
+
104
+
let checker =
105
+
(module struct
106
+
type nonrec state = state
107
+
let create = create
108
+
let reset = reset
109
+
let start_element = start_element
110
+
let end_element = end_element
111
+
let characters = characters
112
+
let end_document = end_document
113
+
end : Checker.S)