Serenity Operating System
1/*
2 * Copyright (c) 2018-2022, Andreas Kling <kling@serenityos.org>
3 * Copyright (c) 2021-2023, Sam Atkins <atkinssj@serenityos.org>
4 *
5 * SPDX-License-Identifier: BSD-2-Clause
6 */
7
8#include <LibWeb/CSS/Parser/Parser.h>
9#include <LibWeb/CSS/SelectorEngine.h>
10#include <LibWeb/DOM/Document.h>
11#include <LibWeb/DOM/Element.h>
12#include <LibWeb/DOM/Text.h>
13#include <LibWeb/HTML/AttributeNames.h>
14#include <LibWeb/HTML/HTMLAnchorElement.h>
15#include <LibWeb/HTML/HTMLAreaElement.h>
16#include <LibWeb/HTML/HTMLButtonElement.h>
17#include <LibWeb/HTML/HTMLFieldSetElement.h>
18#include <LibWeb/HTML/HTMLHtmlElement.h>
19#include <LibWeb/HTML/HTMLInputElement.h>
20#include <LibWeb/HTML/HTMLOptGroupElement.h>
21#include <LibWeb/HTML/HTMLOptionElement.h>
22#include <LibWeb/HTML/HTMLSelectElement.h>
23#include <LibWeb/HTML/HTMLTextAreaElement.h>
24#include <LibWeb/Infra/Strings.h>
25
26namespace Web::SelectorEngine {
27
28// https://drafts.csswg.org/selectors-4/#the-lang-pseudo
29static inline bool matches_lang_pseudo_class(DOM::Element const& element, Vector<FlyString> const& languages)
30{
31 FlyString element_language;
32 for (auto const* e = &element; e; e = e->parent_element()) {
33 auto lang = e->attribute(HTML::AttributeNames::lang);
34 if (!lang.is_null()) {
35 element_language = FlyString::from_deprecated_fly_string(lang).release_value_but_fixme_should_propagate_errors();
36 break;
37 }
38 }
39 if (element_language.is_empty())
40 return false;
41
42 // FIXME: This is ad-hoc. Implement a proper language range matching algorithm as recommended by BCP47.
43 for (auto const& language : languages) {
44 if (language.is_empty())
45 return false;
46 if (language == "*"sv)
47 return true;
48 if (!element_language.to_string().contains('-'))
49 return Infra::is_ascii_case_insensitive_match(element_language, language);
50 auto parts = element_language.to_string().split_limit('-', 2).release_value_but_fixme_should_propagate_errors();
51 return Infra::is_ascii_case_insensitive_match(parts[0], language);
52 }
53 return false;
54}
55
56// https://html.spec.whatwg.org/multipage/semantics-other.html#selector-link
57static inline bool matches_link_pseudo_class(DOM::Element const& element)
58{
59 // All a elements that have an href attribute, and all area elements that have an href attribute, must match one of :link and :visited.
60 if (!is<HTML::HTMLAnchorElement>(element) && !is<HTML::HTMLAreaElement>(element))
61 return false;
62 return element.has_attribute(HTML::AttributeNames::href);
63}
64
65static inline bool matches_hover_pseudo_class(DOM::Element const& element)
66{
67 auto* hovered_node = element.document().hovered_node();
68 if (!hovered_node)
69 return false;
70 if (&element == hovered_node)
71 return true;
72 return element.is_ancestor_of(*hovered_node);
73}
74
75// https://html.spec.whatwg.org/multipage/semantics-other.html#selector-checked
76static inline bool matches_checked_pseudo_class(DOM::Element const& element)
77{
78 // The :checked pseudo-class must match any element falling into one of the following categories:
79 // - input elements whose type attribute is in the Checkbox state and whose checkedness state is true
80 // - input elements whose type attribute is in the Radio Button state and whose checkedness state is true
81 if (is<HTML::HTMLInputElement>(element)) {
82 auto const& input_element = static_cast<HTML::HTMLInputElement const&>(element);
83 switch (input_element.type_state()) {
84 case HTML::HTMLInputElement::TypeAttributeState::Checkbox:
85 case HTML::HTMLInputElement::TypeAttributeState::RadioButton:
86 return static_cast<HTML::HTMLInputElement const&>(element).checked();
87 default:
88 return false;
89 }
90 }
91
92 // FIXME: - option elements whose selectedness is true
93
94 return false;
95}
96
97static inline bool matches_attribute(CSS::Selector::SimpleSelector::Attribute const& attribute, DOM::Element const& element)
98{
99 if (attribute.match_type == CSS::Selector::SimpleSelector::Attribute::MatchType::HasAttribute) {
100 // Early way out in case of an attribute existence selector.
101 return element.has_attribute(attribute.name.to_string().to_deprecated_string());
102 }
103
104 auto const case_insensitive_match = (attribute.case_type == CSS::Selector::SimpleSelector::Attribute::CaseType::CaseInsensitiveMatch);
105 auto const case_sensitivity = case_insensitive_match
106 ? CaseSensitivity::CaseInsensitive
107 : CaseSensitivity::CaseSensitive;
108
109 switch (attribute.match_type) {
110 case CSS::Selector::SimpleSelector::Attribute::MatchType::ExactValueMatch:
111 return case_insensitive_match
112 ? Infra::is_ascii_case_insensitive_match(element.attribute(attribute.name.to_string().to_deprecated_string()), attribute.value)
113 : element.attribute(attribute.name.to_string().to_deprecated_string()) == attribute.value.to_deprecated_string();
114 case CSS::Selector::SimpleSelector::Attribute::MatchType::ContainsWord: {
115 if (attribute.value.is_empty()) {
116 // This selector is always false is match value is empty.
117 return false;
118 }
119 auto const view = element.attribute(attribute.name.to_string().to_deprecated_string()).split_view(' ');
120 auto const size = view.size();
121 for (size_t i = 0; i < size; ++i) {
122 auto const value = view.at(i);
123 if (case_insensitive_match
124 ? Infra::is_ascii_case_insensitive_match(value, attribute.value)
125 : value == attribute.value) {
126 return true;
127 }
128 }
129 return false;
130 }
131 case CSS::Selector::SimpleSelector::Attribute::MatchType::ContainsString:
132 return !attribute.value.is_empty()
133 && element.attribute(attribute.name.to_string().to_deprecated_string()).contains(attribute.value, case_sensitivity);
134 case CSS::Selector::SimpleSelector::Attribute::MatchType::StartsWithSegment: {
135 auto const element_attr_value = element.attribute(attribute.name.to_string().to_deprecated_string());
136 if (element_attr_value.is_empty()) {
137 // If the attribute value on element is empty, the selector is true
138 // if the match value is also empty and false otherwise.
139 return attribute.value.is_empty();
140 }
141 if (attribute.value.is_empty()) {
142 return false;
143 }
144 auto segments = element_attr_value.split_view('-');
145 return case_insensitive_match
146 ? Infra::is_ascii_case_insensitive_match(segments.first(), attribute.value)
147 : segments.first() == attribute.value;
148 }
149 case CSS::Selector::SimpleSelector::Attribute::MatchType::StartsWithString:
150 return !attribute.value.is_empty()
151 && element.attribute(attribute.name.to_string().to_deprecated_string()).starts_with(attribute.value, case_sensitivity);
152 case CSS::Selector::SimpleSelector::Attribute::MatchType::EndsWithString:
153 return !attribute.value.is_empty()
154 && element.attribute(attribute.name.to_string().to_deprecated_string()).ends_with(attribute.value, case_sensitivity);
155 default:
156 break;
157 }
158
159 return false;
160}
161
162static inline DOM::Element const* previous_sibling_with_same_tag_name(DOM::Element const& element)
163{
164 for (auto const* sibling = element.previous_element_sibling(); sibling; sibling = sibling->previous_element_sibling()) {
165 if (sibling->tag_name() == element.tag_name())
166 return sibling;
167 }
168 return nullptr;
169}
170
171static inline DOM::Element const* next_sibling_with_same_tag_name(DOM::Element const& element)
172{
173 for (auto const* sibling = element.next_element_sibling(); sibling; sibling = sibling->next_element_sibling()) {
174 if (sibling->tag_name() == element.tag_name())
175 return sibling;
176 }
177 return nullptr;
178}
179
180static inline bool matches_pseudo_class(CSS::Selector::SimpleSelector::PseudoClass const& pseudo_class, DOM::Element const& element)
181{
182 switch (pseudo_class.type) {
183 case CSS::Selector::SimpleSelector::PseudoClass::Type::Link:
184 return matches_link_pseudo_class(element);
185 case CSS::Selector::SimpleSelector::PseudoClass::Type::Visited:
186 // FIXME: Maybe match this selector sometimes?
187 return false;
188 case CSS::Selector::SimpleSelector::PseudoClass::Type::Active:
189 return element.is_active();
190 case CSS::Selector::SimpleSelector::PseudoClass::Type::Hover:
191 return matches_hover_pseudo_class(element);
192 case CSS::Selector::SimpleSelector::PseudoClass::Type::Focus:
193 return element.is_focused();
194 case CSS::Selector::SimpleSelector::PseudoClass::Type::FocusWithin: {
195 auto* focused_element = element.document().focused_element();
196 return focused_element && element.is_inclusive_ancestor_of(*focused_element);
197 }
198 case CSS::Selector::SimpleSelector::PseudoClass::Type::FirstChild:
199 return !element.previous_element_sibling();
200 case CSS::Selector::SimpleSelector::PseudoClass::Type::LastChild:
201 return !element.next_element_sibling();
202 case CSS::Selector::SimpleSelector::PseudoClass::Type::OnlyChild:
203 return !(element.previous_element_sibling() || element.next_element_sibling());
204 case CSS::Selector::SimpleSelector::PseudoClass::Type::Empty: {
205 if (!element.has_children())
206 return true;
207 if (element.first_child_of_type<DOM::Element>())
208 return false;
209 // NOTE: CSS Selectors level 4 changed ":empty" to also match whitespace-only text nodes.
210 // However, none of the major browser supports this yet, so let's just hang back until they do.
211 bool has_nonempty_text_child = false;
212 element.for_each_child_of_type<DOM::Text>([&](auto const& text_child) {
213 if (!text_child.data().is_empty()) {
214 has_nonempty_text_child = true;
215 return IterationDecision::Break;
216 }
217 return IterationDecision::Continue;
218 });
219 return !has_nonempty_text_child;
220 }
221 case CSS::Selector::SimpleSelector::PseudoClass::Type::Root:
222 return is<HTML::HTMLHtmlElement>(element);
223 case CSS::Selector::SimpleSelector::PseudoClass::Type::FirstOfType:
224 return !previous_sibling_with_same_tag_name(element);
225 case CSS::Selector::SimpleSelector::PseudoClass::Type::LastOfType:
226 return !next_sibling_with_same_tag_name(element);
227 case CSS::Selector::SimpleSelector::PseudoClass::Type::OnlyOfType:
228 return !previous_sibling_with_same_tag_name(element) && !next_sibling_with_same_tag_name(element);
229 case CSS::Selector::SimpleSelector::PseudoClass::Type::Lang:
230 return matches_lang_pseudo_class(element, pseudo_class.languages);
231 case CSS::Selector::SimpleSelector::PseudoClass::Type::Disabled:
232 // https://html.spec.whatwg.org/multipage/semantics-other.html#selector-disabled
233 // The :disabled pseudo-class must match any element that is actually disabled.
234 return element.is_actually_disabled();
235 case CSS::Selector::SimpleSelector::PseudoClass::Type::Enabled:
236 // https://html.spec.whatwg.org/multipage/semantics-other.html#selector-enabled
237 // The :enabled pseudo-class must match any button, input, select, textarea, optgroup, option, fieldset element, or form-associated custom element that is not actually disabled.
238 return (is<HTML::HTMLButtonElement>(element) || is<HTML::HTMLInputElement>(element) || is<HTML::HTMLSelectElement>(element) || is<HTML::HTMLTextAreaElement>(element) || is<HTML::HTMLOptGroupElement>(element) || is<HTML::HTMLOptionElement>(element) || is<HTML::HTMLFieldSetElement>(element))
239 && !element.is_actually_disabled();
240 case CSS::Selector::SimpleSelector::PseudoClass::Type::Checked:
241 return matches_checked_pseudo_class(element);
242 case CSS::Selector::SimpleSelector::PseudoClass::Type::Is:
243 case CSS::Selector::SimpleSelector::PseudoClass::Type::Where:
244 for (auto& selector : pseudo_class.argument_selector_list) {
245 if (matches(selector, element))
246 return true;
247 }
248 return false;
249 case CSS::Selector::SimpleSelector::PseudoClass::Type::Not:
250 for (auto& selector : pseudo_class.argument_selector_list) {
251 if (matches(selector, element))
252 return false;
253 }
254 return true;
255 case CSS::Selector::SimpleSelector::PseudoClass::Type::NthChild:
256 case CSS::Selector::SimpleSelector::PseudoClass::Type::NthLastChild:
257 case CSS::Selector::SimpleSelector::PseudoClass::Type::NthOfType:
258 case CSS::Selector::SimpleSelector::PseudoClass::Type::NthLastOfType:
259 auto const step_size = pseudo_class.nth_child_pattern.step_size;
260 auto const offset = pseudo_class.nth_child_pattern.offset;
261 if (step_size == 0 && offset == 0)
262 return false; // "If both a and b are equal to zero, the pseudo-class represents no element in the document tree."
263
264 auto const* parent = element.parent_element();
265 if (!parent)
266 return false;
267
268 auto matches_selector_list = [](CSS::SelectorList const& list, DOM::Element const& element) {
269 if (list.is_empty())
270 return true;
271 for (auto const& child_selector : list) {
272 if (matches(child_selector, element)) {
273 return true;
274 }
275 }
276 return false;
277 };
278
279 int index = 1;
280 switch (pseudo_class.type) {
281 case CSS::Selector::SimpleSelector::PseudoClass::Type::NthChild: {
282 if (!matches_selector_list(pseudo_class.argument_selector_list, element))
283 return false;
284 for (auto* child = parent->first_child_of_type<DOM::Element>(); child && child != &element; child = child->next_element_sibling()) {
285 if (matches_selector_list(pseudo_class.argument_selector_list, *child))
286 ++index;
287 }
288 break;
289 }
290 case CSS::Selector::SimpleSelector::PseudoClass::Type::NthLastChild: {
291 if (!matches_selector_list(pseudo_class.argument_selector_list, element))
292 return false;
293 for (auto* child = parent->last_child_of_type<DOM::Element>(); child && child != &element; child = child->previous_element_sibling()) {
294 if (matches_selector_list(pseudo_class.argument_selector_list, *child))
295 ++index;
296 }
297 break;
298 }
299 case CSS::Selector::SimpleSelector::PseudoClass::Type::NthOfType: {
300 for (auto* child = previous_sibling_with_same_tag_name(element); child; child = previous_sibling_with_same_tag_name(*child))
301 ++index;
302 break;
303 }
304 case CSS::Selector::SimpleSelector::PseudoClass::Type::NthLastOfType: {
305 for (auto* child = next_sibling_with_same_tag_name(element); child; child = next_sibling_with_same_tag_name(*child))
306 ++index;
307 break;
308 }
309 default:
310 VERIFY_NOT_REACHED();
311 }
312
313 // When "step_size == -1", selector represents first "offset" elements in document tree.
314 if (step_size == -1)
315 return !(offset <= 0 || index > offset);
316
317 // When "step_size == 1", selector represents last "offset" elements in document tree.
318 if (step_size == 1)
319 return !(offset < 0 || index < offset);
320
321 // When "step_size == 0", selector picks only the "offset" element.
322 if (step_size == 0)
323 return index == offset;
324
325 // If both are negative, nothing can match.
326 if (step_size < 0 && offset < 0)
327 return false;
328
329 // Like "a % b", but handles negative integers correctly.
330 auto const canonical_modulo = [](int a, int b) -> int {
331 int c = a % b;
332 if ((c < 0 && b > 0) || (c > 0 && b < 0)) {
333 c += b;
334 }
335 return c;
336 };
337
338 // When "step_size < 0", we start at "offset" and count backwards.
339 if (step_size < 0)
340 return index <= offset && canonical_modulo(index - offset, -step_size) == 0;
341
342 // Otherwise, we start at "offset" and count forwards.
343 return index >= offset && canonical_modulo(index - offset, step_size) == 0;
344 }
345
346 return false;
347}
348
349static inline bool matches(CSS::Selector::SimpleSelector const& component, DOM::Element const& element)
350{
351 switch (component.type) {
352 case CSS::Selector::SimpleSelector::Type::Universal:
353 return true;
354 case CSS::Selector::SimpleSelector::Type::Id:
355 return component.name() == element.attribute(HTML::AttributeNames::id).view();
356 case CSS::Selector::SimpleSelector::Type::Class:
357 return element.has_class(component.name());
358 case CSS::Selector::SimpleSelector::Type::TagName:
359 // See https://html.spec.whatwg.org/multipage/semantics-other.html#case-sensitivity-of-selectors
360 if (element.document().document_type() == DOM::Document::Type::HTML)
361 return component.lowercase_name() == element.local_name().view();
362 return Infra::is_ascii_case_insensitive_match(component.name(), element.local_name());
363 case CSS::Selector::SimpleSelector::Type::Attribute:
364 return matches_attribute(component.attribute(), element);
365 case CSS::Selector::SimpleSelector::Type::PseudoClass:
366 return matches_pseudo_class(component.pseudo_class(), element);
367 case CSS::Selector::SimpleSelector::Type::PseudoElement:
368 // Pseudo-element matching/not-matching is handled in the top level matches().
369 return true;
370 default:
371 VERIFY_NOT_REACHED();
372 }
373}
374
375static inline bool matches(CSS::Selector const& selector, int component_list_index, DOM::Element const& element)
376{
377 auto& relative_selector = selector.compound_selectors()[component_list_index];
378 for (auto& simple_selector : relative_selector.simple_selectors) {
379 if (!matches(simple_selector, element))
380 return false;
381 }
382 switch (relative_selector.combinator) {
383 case CSS::Selector::Combinator::None:
384 return true;
385 case CSS::Selector::Combinator::Descendant:
386 VERIFY(component_list_index != 0);
387 for (auto* ancestor = element.parent(); ancestor; ancestor = ancestor->parent()) {
388 if (!is<DOM::Element>(*ancestor))
389 continue;
390 if (matches(selector, component_list_index - 1, static_cast<DOM::Element const&>(*ancestor)))
391 return true;
392 }
393 return false;
394 case CSS::Selector::Combinator::ImmediateChild:
395 VERIFY(component_list_index != 0);
396 if (!element.parent() || !is<DOM::Element>(*element.parent()))
397 return false;
398 return matches(selector, component_list_index - 1, static_cast<DOM::Element const&>(*element.parent()));
399 case CSS::Selector::Combinator::NextSibling:
400 VERIFY(component_list_index != 0);
401 if (auto* sibling = element.previous_element_sibling())
402 return matches(selector, component_list_index - 1, *sibling);
403 return false;
404 case CSS::Selector::Combinator::SubsequentSibling:
405 VERIFY(component_list_index != 0);
406 for (auto* sibling = element.previous_element_sibling(); sibling; sibling = sibling->previous_element_sibling()) {
407 if (matches(selector, component_list_index - 1, *sibling))
408 return true;
409 }
410 return false;
411 case CSS::Selector::Combinator::Column:
412 TODO();
413 }
414 VERIFY_NOT_REACHED();
415}
416
417bool matches(CSS::Selector const& selector, DOM::Element const& element, Optional<CSS::Selector::PseudoElement> pseudo_element)
418{
419 VERIFY(!selector.compound_selectors().is_empty());
420 if (pseudo_element.has_value() && selector.pseudo_element() != pseudo_element)
421 return false;
422 if (!pseudo_element.has_value() && selector.pseudo_element().has_value())
423 return false;
424 return matches(selector, selector.compound_selectors().size() - 1, element);
425}
426
427}