+12
-2
lib/check/specialized/title_checker.ml
+12
-2
lib/check/specialized/title_checker.ml
···
2
3
type state = {
4
mutable in_head : bool;
5
mutable has_title : bool;
6
mutable in_title : bool;
7
mutable title_has_content : bool;
···
10
11
let create () = {
12
in_head = false;
13
has_title = false;
14
in_title = false;
15
title_has_content = false;
···
18
19
let reset state =
20
state.in_head <- false;
21
state.has_title <- false;
22
state.in_title <- false;
23
state.title_has_content <- false;
···
27
(match element.Element.tag with
28
| Tag.Html `Html -> ()
29
| Tag.Html `Head ->
30
-
state.in_head <- true
31
| Tag.Html `Title when state.in_head ->
32
state.has_title <- true;
33
state.in_title <- true;
34
state.title_has_content <- false;
35
state.title_depth <- 0
36
| _ -> ());
37
if state.in_title then
38
state.title_depth <- state.title_depth + 1
···
47
(`Element (`Must_not_be_empty (`Elem "title")));
48
state.in_title <- false
49
| Tag.Html `Head ->
50
-
if state.in_head && not state.has_title then
51
Message_collector.add_typed collector
52
(`Element (`Missing_child (`Parent "head", `Child "title")));
53
state.in_head <- false
···
2
3
type state = {
4
mutable in_head : bool;
5
+
mutable head_had_children : bool; (* true if head contained any child elements *)
6
mutable has_title : bool;
7
mutable in_title : bool;
8
mutable title_has_content : bool;
···
11
12
let create () = {
13
in_head = false;
14
+
head_had_children = false;
15
has_title = false;
16
in_title = false;
17
title_has_content = false;
···
20
21
let reset state =
22
state.in_head <- false;
23
+
state.head_had_children <- false;
24
state.has_title <- false;
25
state.in_title <- false;
26
state.title_has_content <- false;
···
30
(match element.Element.tag with
31
| Tag.Html `Html -> ()
32
| Tag.Html `Head ->
33
+
state.in_head <- true;
34
+
state.head_had_children <- false
35
| Tag.Html `Title when state.in_head ->
36
+
state.head_had_children <- true;
37
state.has_title <- true;
38
state.in_title <- true;
39
state.title_has_content <- false;
40
state.title_depth <- 0
41
+
| _ when state.in_head ->
42
+
(* Any element inside head means head had children *)
43
+
state.head_had_children <- true
44
| _ -> ());
45
if state.in_title then
46
state.title_depth <- state.title_depth + 1
···
55
(`Element (`Must_not_be_empty (`Elem "title")));
56
state.in_title <- false
57
| Tag.Html `Head ->
58
+
(* Only report missing title if head had children (was explicit with content).
59
+
An empty head was likely implicit (fragment validation from body). *)
60
+
if state.in_head && not state.has_title && state.head_had_children then
61
Message_collector.add_typed collector
62
(`Element (`Missing_child (`Parent "head", `Child "title")));
63
state.in_head <- false
+16
-4
lib/js/htmlrw_js_dom.ml
+16
-4
lib/js/htmlrw_js_dom.ml
···
81
82
(* Build the location map by matching elements *)
83
let loc_to_el =
84
let rec match_elements loc_map browser_els html5rw_els =
85
match browser_els, html5rw_els with
86
| [], _ | _, [] -> loc_map
···
96
in
97
match_elements loc_map b_rest h_rest
98
else
99
-
(* Tags don't match - try to resync by skipping one side *)
100
-
(* This handles cases where browser might have implicit elements *)
101
-
match_elements loc_map b_rest html5rw_els
102
in
103
-
match_elements LocMap.empty browser_elements html5rw_elements
104
in
105
106
{ root; html_source = html; loc_to_el }, html
···
81
82
(* Build the location map by matching elements *)
83
let loc_to_el =
84
+
(* Find the starting point in parsed elements that matches the root tag *)
85
+
let root_tag = String.lowercase_ascii (Jstr.to_string (El.tag_name root)) in
86
+
let rec find_start = function
87
+
| [] -> []
88
+
| h_el :: rest ->
89
+
if String.lowercase_ascii h_el.Html5rw.Dom.name = root_tag then
90
+
h_el :: rest
91
+
else
92
+
find_start rest
93
+
in
94
+
let html5rw_elements_aligned = find_start html5rw_elements in
95
+
96
let rec match_elements loc_map browser_els html5rw_els =
97
match browser_els, html5rw_els with
98
| [], _ | _, [] -> loc_map
···
108
in
109
match_elements loc_map b_rest h_rest
110
else
111
+
(* Tags don't match - try skipping the parsed element first *)
112
+
(* This handles cases where parser creates implicit elements *)
113
+
match_elements loc_map browser_els h_rest
114
in
115
+
match_elements LocMap.empty browser_elements html5rw_elements_aligned
116
in
117
118
{ root; html_source = html; loc_to_el }, html