+137
-158
lib/check/semantic/nesting_checker.ml
+137
-158
lib/check/semantic/nesting_checker.ml
···
1
-
(** Interactive element nesting checker implementation. *)
2
3
-
(** Special ancestors that need tracking for nesting validation.
4
5
-
This array defines the elements whose presence in the ancestor chain
6
-
affects validation of descendant elements. The order is significant
7
-
as it determines bit positions in the ancestor bitmask. *)
8
let special_ancestors =
9
[| "a"; "address"; "body"; "button"; "caption"; "dfn"; "dt"; "figcaption";
10
"figure"; "footer"; "form"; "header"; "label"; "map"; "noscript"; "th";
···
13
"s"; "small"; "mark"; "abbr"; "cite"; "code"; "q"; "sub"; "sup"; "samp";
14
"kbd"; "var" |]
15
16
-
(** Hashtable for O(1) lookup of special ancestor bit positions *)
17
let special_ancestor_table : (string, int) Hashtbl.t =
18
let tbl = Hashtbl.create 64 in
19
Array.iteri (fun i name -> Hashtbl.add tbl name i) special_ancestors;
20
tbl
21
22
-
(** Get the bit position for a special ancestor element.
23
-
Returns [-1] if the element is not a special ancestor. O(1) lookup. *)
24
-
let special_ancestor_number name =
25
match Hashtbl.find_opt special_ancestor_table name with
26
| Some i -> i
27
| None -> -1
···
31
[| "a"; "button"; "details"; "embed"; "iframe"; "label"; "select";
32
"textarea" |]
33
34
-
(** Map from descendant element name to bitmask of prohibited ancestors. *)
35
-
let ancestor_mask_by_descendant : (string, int) Hashtbl.t =
36
Hashtbl.create 64
37
38
-
(** Map from descendant element name to bitmask of ancestors that cause content model violations.
39
-
(These use different error messages than nesting violations.) *)
40
-
let content_model_violation_mask : (string, int) Hashtbl.t =
41
Hashtbl.create 64
42
43
(** Register that [ancestor] is prohibited for [descendant]. *)
44
let register_prohibited_ancestor ancestor descendant =
45
-
let number = special_ancestor_number ancestor in
46
-
if number = -1 then
47
failwith ("Ancestor not found in array: " ^ ancestor);
48
-
let mask =
49
-
match Hashtbl.find_opt ancestor_mask_by_descendant descendant with
50
-
| None -> 0
51
-
| Some m -> m
52
-
in
53
-
let new_mask = mask lor (1 lsl number) in
54
-
Hashtbl.replace ancestor_mask_by_descendant descendant new_mask
55
56
(** Register a content model violation (phrasing-only element containing flow content). *)
57
let register_content_model_violation ancestor descendant =
58
register_prohibited_ancestor ancestor descendant;
59
-
let number = special_ancestor_number ancestor in
60
-
let mask =
61
-
match Hashtbl.find_opt content_model_violation_mask descendant with
62
-
| None -> 0
63
-
| Some m -> m
64
-
in
65
-
let new_mask = mask lor (1 lsl number) in
66
-
Hashtbl.replace content_model_violation_mask descendant new_mask
67
68
(** Initialize the prohibited ancestor map. *)
69
let () =
···
133
) interactive_elements;
134
135
(* Phrasing-only elements: cannot contain flow content like p, div, h1-h6, etc. *)
136
-
(* These are content model violations, not nesting violations. *)
137
let phrasing_only = ["span"; "strong"; "em"; "b"; "i"; "u"; "s"; "small"; "mark";
138
"abbr"; "cite"; "code"; "q"; "sub"; "sup"; "samp"; "kbd"; "var"] in
139
let flow_content = ["p"; "div"; "article"; "section"; "nav"; "aside"; "header"; "footer";
···
145
) flow_content
146
) phrasing_only
147
148
-
(** Bitmask constants for common checks. *)
149
-
let a_button_mask =
150
-
let a_num = special_ancestor_number "a" in
151
-
let button_num = special_ancestor_number "button" in
152
-
(1 lsl a_num) lor (1 lsl button_num)
153
-
154
-
let map_mask =
155
-
let map_num = special_ancestor_number "map" in
156
-
1 lsl map_num
157
158
-
(** Transparent elements - inherit content model from parent. O(1) hashtable lookup. *)
159
let transparent_elements_tbl =
160
Attr_utils.hashtbl_of_list ["a"; "canvas"; "video"; "audio"; "object"; "ins"; "del"; "map"]
161
···
163
164
(** Stack node representing an element's context. *)
165
type stack_node = {
166
-
ancestor_mask : int;
167
name : string;
168
is_transparent : bool;
169
}
···
171
(** Checker state. *)
172
type state = {
173
mutable stack : stack_node list;
174
-
mutable ancestor_mask : int;
175
}
176
177
let create () =
178
-
{ stack = []; ancestor_mask = 0 }
179
180
let reset state =
181
state.stack <- [];
182
-
state.ancestor_mask <- 0
183
184
(** Get attribute value by name from attribute list. *)
185
let get_attr attrs name =
···
192
(** Check if element is interactive based on its attributes. *)
193
let is_interactive_element name attrs =
194
match name with
195
-
| "a" ->
196
-
has_attr attrs "href"
197
-
| "audio" | "video" ->
198
-
has_attr attrs "controls"
199
-
| "img" | "object" ->
200
-
has_attr attrs "usemap"
201
| "input" ->
202
-
begin match get_attr attrs "type" with
203
-
| Some "hidden" -> false
204
-
| _ -> true
205
-
end
206
| "button" | "details" | "embed" | "iframe" | "label" | "select"
207
-
| "textarea" ->
208
-
true
209
-
| _ ->
210
-
false
211
212
-
(** Find the nearest transparent element in the ancestor stack, if any.
213
-
Returns the immediate parent's name if it's transparent, otherwise None. *)
214
let find_nearest_transparent_parent state =
215
match state.stack with
216
| parent :: _ when parent.is_transparent -> Some parent.name
···
218
219
(** Report nesting violations. *)
220
let check_nesting state name attrs collector =
221
-
(* Compute the prohibited ancestor mask for this element *)
222
-
let base_mask =
223
-
match Hashtbl.find_opt ancestor_mask_by_descendant name with
224
-
| Some m -> m
225
-
| None -> 0
226
in
227
228
-
(* Get content model violation mask for this element *)
229
-
let content_model_mask =
230
-
match Hashtbl.find_opt content_model_violation_mask name with
231
-
| Some m -> m
232
-
| None -> 0
233
in
234
235
-
(* Add interactive element restrictions if applicable *)
236
-
let mask =
237
-
if is_interactive_element name attrs then
238
-
base_mask lor a_button_mask
239
-
else
240
-
base_mask
241
in
242
243
-
(* Check for violations *)
244
-
if mask <> 0 then begin
245
-
let mask_hit = state.ancestor_mask land mask in
246
-
if mask_hit <> 0 then begin
247
-
(* Determine if element has a special attribute to mention *)
248
-
let attr =
249
-
match name with
250
-
| "a" when has_attr attrs "href" -> Some "href"
251
-
| "audio" when has_attr attrs "controls" -> Some "controls"
252
-
| "video" when has_attr attrs "controls" -> Some "controls"
253
-
| "img" when has_attr attrs "usemap" -> Some "usemap"
254
-
| "object" when has_attr attrs "usemap" -> Some "usemap"
255
-
| _ -> None
256
in
257
-
(* Find the transparent parent (like canvas) if any *)
258
-
let transparent_parent = find_nearest_transparent_parent state in
259
-
(* Find which ancestors are violated *)
260
-
Array.iteri (fun i ancestor ->
261
-
let bit = 1 lsl i in
262
-
if (mask_hit land bit) <> 0 then begin
263
-
(* Check if this is a content model violation or a nesting violation *)
264
-
if (content_model_mask land bit) <> 0 then begin
265
-
(* Content model violation: use "not allowed as child" format *)
266
-
(* If there's a transparent parent, use that instead of the ancestor *)
267
-
let parent = match transparent_parent with
268
-
| Some p -> p
269
-
| None -> ancestor
270
-
in
271
-
Message_collector.add_typed collector
272
-
(`Element (`Not_allowed_as_child (`Child name, `Parent parent)))
273
-
end else
274
-
(* Nesting violation: use "must not be descendant" format *)
275
-
Message_collector.add_typed collector
276
-
(`Element (`Must_not_descend (`Elem name, `Attr attr, `Ancestor ancestor)))
277
-
end
278
-
) special_ancestors
279
end
280
-
end
281
282
(** Check for required ancestors. *)
283
let check_required_ancestors state name collector =
284
match name with
285
| "area" ->
286
-
if (state.ancestor_mask land map_mask) = 0 then
287
Message_collector.add_typed collector
288
(`Generic (Printf.sprintf "The %s element must have a %s ancestor."
289
(Error_code.q "area") (Error_code.q "map")))
290
| _ -> ()
291
292
-
(** Check for metadata-only elements appearing outside valid contexts.
293
-
style element is only valid in head or in noscript (in head). *)
294
let check_metadata_element_context state name collector =
295
match name with
296
| "style" ->
297
-
(* style is only valid inside head or noscript *)
298
-
begin match state.stack with
299
-
| parent :: _ when parent.name = "head" -> () (* valid *)
300
-
| parent :: _ when parent.name = "noscript" -> () (* valid in noscript in head *)
301
-
| parent :: _ ->
302
-
(* style inside any other element is not allowed *)
303
-
Message_collector.add_typed collector
304
-
(`Element (`Not_allowed_as_child (`Child "style", `Parent parent.name)))
305
-
| [] -> () (* at root level, would be caught elsewhere *)
306
-
end
307
| _ -> ()
308
309
let start_element state ~element collector =
310
-
(* Only check HTML elements, not SVG or MathML *)
311
match element.Element.tag with
312
| Tag.Html _ ->
313
let name = Tag.tag_to_string element.tag in
314
let attrs = element.raw_attrs in
315
(* Check for nesting violations *)
316
check_nesting state name attrs collector;
317
check_required_ancestors state name collector;
318
check_metadata_element_context state name collector;
319
320
-
(* Update ancestor mask if this is a special ancestor *)
321
-
let new_mask = state.ancestor_mask in
322
-
let number = special_ancestor_number name in
323
-
let new_mask =
324
-
if number >= 0 then
325
-
new_mask lor (1 lsl number)
326
-
else
327
-
new_mask
328
-
in
329
330
-
(* Add href tracking for <a> elements *)
331
-
let new_mask =
332
-
if name = "a" && has_attr attrs "href" then
333
-
let a_num = special_ancestor_number "a" in
334
-
new_mask lor (1 lsl a_num)
335
-
else
336
-
new_mask
337
-
in
338
339
-
(* Push onto stack *)
340
let is_transparent = is_transparent_element name in
341
-
let node = { ancestor_mask = state.ancestor_mask; name; is_transparent } in
342
state.stack <- node :: state.stack;
343
-
state.ancestor_mask <- new_mask
344
-
| _ -> () (* SVG, MathML, Custom, Unknown *)
345
346
let end_element state ~tag _collector =
347
-
(* Only track HTML elements *)
348
match tag with
349
| Tag.Html _ ->
350
-
(* Pop from stack and restore ancestor mask *)
351
-
begin match state.stack with
352
-
| [] -> () (* Should not happen in well-formed documents *)
353
-
| node :: rest ->
354
-
state.stack <- rest;
355
-
state.ancestor_mask <- node.ancestor_mask
356
-
end
357
| _ -> ()
358
359
-
(** Create the checker as a first-class module. *)
360
let checker = Checker.make ~create ~reset ~start_element ~end_element ()
···
1
+
(** Interactive element nesting checker implementation.
2
3
+
Uses bool arrays instead of bitmasks for JavaScript compatibility
4
+
(JS bitwise ops are limited to 32 bits). *)
5
6
+
(** Special ancestors that need tracking for nesting validation. *)
7
let special_ancestors =
8
[| "a"; "address"; "body"; "button"; "caption"; "dfn"; "dt"; "figcaption";
9
"figure"; "footer"; "form"; "header"; "label"; "map"; "noscript"; "th";
···
12
"s"; "small"; "mark"; "abbr"; "cite"; "code"; "q"; "sub"; "sup"; "samp";
13
"kbd"; "var" |]
14
15
+
let num_ancestors = Array.length special_ancestors
16
+
17
+
(** Hashtable for O(1) lookup of special ancestor indices *)
18
let special_ancestor_table : (string, int) Hashtbl.t =
19
let tbl = Hashtbl.create 64 in
20
Array.iteri (fun i name -> Hashtbl.add tbl name i) special_ancestors;
21
tbl
22
23
+
(** Get the index for a special ancestor element.
24
+
Returns [-1] if the element is not a special ancestor. *)
25
+
let special_ancestor_index name =
26
match Hashtbl.find_opt special_ancestor_table name with
27
| Some i -> i
28
| None -> -1
···
32
[| "a"; "button"; "details"; "embed"; "iframe"; "label"; "select";
33
"textarea" |]
34
35
+
(** Create an empty bool array for ancestor tracking *)
36
+
let empty_flags () = Array.make num_ancestors false
37
+
38
+
(** Copy a bool array *)
39
+
let copy_flags flags = Array.copy flags
40
+
41
+
(** Map from descendant element name to prohibited ancestor flags. *)
42
+
let prohibited_ancestors_by_descendant : (string, bool array) Hashtbl.t =
43
Hashtbl.create 64
44
45
+
(** Map from descendant element name to content model violation flags. *)
46
+
let content_model_violations : (string, bool array) Hashtbl.t =
47
Hashtbl.create 64
48
49
+
(** Get or create prohibited ancestors array for a descendant *)
50
+
let get_prohibited descendant =
51
+
match Hashtbl.find_opt prohibited_ancestors_by_descendant descendant with
52
+
| Some arr -> arr
53
+
| None ->
54
+
let arr = empty_flags () in
55
+
Hashtbl.replace prohibited_ancestors_by_descendant descendant arr;
56
+
arr
57
+
58
+
(** Get or create content model violations array for a descendant *)
59
+
let get_content_model_violations descendant =
60
+
match Hashtbl.find_opt content_model_violations descendant with
61
+
| Some arr -> arr
62
+
| None ->
63
+
let arr = empty_flags () in
64
+
Hashtbl.replace content_model_violations descendant arr;
65
+
arr
66
+
67
(** Register that [ancestor] is prohibited for [descendant]. *)
68
let register_prohibited_ancestor ancestor descendant =
69
+
let idx = special_ancestor_index ancestor in
70
+
if idx = -1 then
71
failwith ("Ancestor not found in array: " ^ ancestor);
72
+
let arr = get_prohibited descendant in
73
+
arr.(idx) <- true
74
75
(** Register a content model violation (phrasing-only element containing flow content). *)
76
let register_content_model_violation ancestor descendant =
77
register_prohibited_ancestor ancestor descendant;
78
+
let idx = special_ancestor_index ancestor in
79
+
let arr = get_content_model_violations descendant in
80
+
arr.(idx) <- true
81
82
(** Initialize the prohibited ancestor map. *)
83
let () =
···
147
) interactive_elements;
148
149
(* Phrasing-only elements: cannot contain flow content like p, div, h1-h6, etc. *)
150
let phrasing_only = ["span"; "strong"; "em"; "b"; "i"; "u"; "s"; "small"; "mark";
151
"abbr"; "cite"; "code"; "q"; "sub"; "sup"; "samp"; "kbd"; "var"] in
152
let flow_content = ["p"; "div"; "article"; "section"; "nav"; "aside"; "header"; "footer";
···
158
) flow_content
159
) phrasing_only
160
161
+
(** Indices for common checks *)
162
+
let a_index = special_ancestor_index "a"
163
+
let button_index = special_ancestor_index "button"
164
+
let map_index = special_ancestor_index "map"
165
166
+
(** Transparent elements - inherit content model from parent. *)
167
let transparent_elements_tbl =
168
Attr_utils.hashtbl_of_list ["a"; "canvas"; "video"; "audio"; "object"; "ins"; "del"; "map"]
169
···
171
172
(** Stack node representing an element's context. *)
173
type stack_node = {
174
+
ancestor_flags : bool array;
175
name : string;
176
is_transparent : bool;
177
}
···
179
(** Checker state. *)
180
type state = {
181
mutable stack : stack_node list;
182
+
mutable ancestor_flags : bool array;
183
}
184
185
let create () =
186
+
{ stack = []; ancestor_flags = empty_flags () }
187
188
let reset state =
189
state.stack <- [];
190
+
state.ancestor_flags <- empty_flags ()
191
192
(** Get attribute value by name from attribute list. *)
193
let get_attr attrs name =
···
200
(** Check if element is interactive based on its attributes. *)
201
let is_interactive_element name attrs =
202
match name with
203
+
| "a" -> has_attr attrs "href"
204
+
| "audio" | "video" -> has_attr attrs "controls"
205
+
| "img" | "object" -> has_attr attrs "usemap"
206
| "input" ->
207
+
(match get_attr attrs "type" with
208
+
| Some "hidden" -> false
209
+
| _ -> true)
210
| "button" | "details" | "embed" | "iframe" | "label" | "select"
211
+
| "textarea" -> true
212
+
| _ -> false
213
214
+
(** Find the nearest transparent element in the ancestor stack. *)
215
let find_nearest_transparent_parent state =
216
match state.stack with
217
| parent :: _ when parent.is_transparent -> Some parent.name
···
219
220
(** Report nesting violations. *)
221
let check_nesting state name attrs collector =
222
+
(* Get prohibited ancestors for this element *)
223
+
let prohibited =
224
+
match Hashtbl.find_opt prohibited_ancestors_by_descendant name with
225
+
| Some arr -> arr
226
+
| None -> empty_flags ()
227
in
228
229
+
(* Get content model violations for this element *)
230
+
let content_violations =
231
+
match Hashtbl.find_opt content_model_violations name with
232
+
| Some arr -> arr
233
+
| None -> empty_flags ()
234
in
235
236
+
(* Check if element is interactive (adds a/button restrictions) *)
237
+
let is_interactive = is_interactive_element name attrs in
238
+
239
+
(* Determine attribute to mention in error messages *)
240
+
let attr =
241
+
match name with
242
+
| "a" when has_attr attrs "href" -> Some "href"
243
+
| "audio" when has_attr attrs "controls" -> Some "controls"
244
+
| "video" when has_attr attrs "controls" -> Some "controls"
245
+
| "img" when has_attr attrs "usemap" -> Some "usemap"
246
+
| "object" when has_attr attrs "usemap" -> Some "usemap"
247
+
| _ -> None
248
in
249
250
+
(* Find transparent parent if any *)
251
+
let transparent_parent = find_nearest_transparent_parent state in
252
+
253
+
(* Check each special ancestor *)
254
+
Array.iteri (fun i ancestor ->
255
+
(* Is this ancestor in our current ancestor chain? *)
256
+
if state.ancestor_flags.(i) then begin
257
+
(* Is this ancestor prohibited for this element? *)
258
+
let is_prohibited =
259
+
prohibited.(i) ||
260
+
(is_interactive && (i = a_index || i = button_index))
261
in
262
+
if is_prohibited then begin
263
+
(* Is this a content model violation or a nesting violation? *)
264
+
if content_violations.(i) then begin
265
+
(* Content model violation: use "not allowed as child" format *)
266
+
let parent = match transparent_parent with
267
+
| Some p -> p
268
+
| None -> ancestor
269
+
in
270
+
Message_collector.add_typed collector
271
+
(`Element (`Not_allowed_as_child (`Child name, `Parent parent)))
272
+
end else
273
+
(* Nesting violation: use "must not be descendant" format *)
274
+
Message_collector.add_typed collector
275
+
(`Element (`Must_not_descend (`Elem name, `Attr attr, `Ancestor ancestor)))
276
+
end
277
end
278
+
) special_ancestors
279
280
(** Check for required ancestors. *)
281
let check_required_ancestors state name collector =
282
match name with
283
| "area" ->
284
+
if not state.ancestor_flags.(map_index) then
285
Message_collector.add_typed collector
286
(`Generic (Printf.sprintf "The %s element must have a %s ancestor."
287
(Error_code.q "area") (Error_code.q "map")))
288
| _ -> ()
289
290
+
(** Check for metadata-only elements appearing outside valid contexts. *)
291
let check_metadata_element_context state name collector =
292
match name with
293
| "style" ->
294
+
(match state.stack with
295
+
| parent :: _ when parent.name = "head" -> ()
296
+
| parent :: _ when parent.name = "noscript" -> ()
297
+
| parent :: _ ->
298
+
Message_collector.add_typed collector
299
+
(`Element (`Not_allowed_as_child (`Child "style", `Parent parent.name)))
300
+
| [] -> ())
301
| _ -> ()
302
303
let start_element state ~element collector =
304
match element.Element.tag with
305
| Tag.Html _ ->
306
let name = Tag.tag_to_string element.tag in
307
let attrs = element.raw_attrs in
308
+
309
(* Check for nesting violations *)
310
check_nesting state name attrs collector;
311
check_required_ancestors state name collector;
312
check_metadata_element_context state name collector;
313
314
+
(* Create new flags, copying current state *)
315
+
let new_flags = copy_flags state.ancestor_flags in
316
317
+
(* Set flag if this is a special ancestor *)
318
+
let idx = special_ancestor_index name in
319
+
if idx >= 0 then
320
+
new_flags.(idx) <- true;
321
322
+
(* Push onto stack (save old flags) *)
323
let is_transparent = is_transparent_element name in
324
+
let node = { ancestor_flags = state.ancestor_flags; name; is_transparent } in
325
state.stack <- node :: state.stack;
326
+
state.ancestor_flags <- new_flags
327
+
| _ -> ()
328
329
let end_element state ~tag _collector =
330
match tag with
331
| Tag.Html _ ->
332
+
(match state.stack with
333
+
| [] -> ()
334
+
| node :: rest ->
335
+
state.stack <- rest;
336
+
state.ancestor_flags <- node.ancestor_flags)
337
| _ -> ()
338
339
let checker = Checker.make ~create ~reset ~start_element ~end_element ()