Serenity Operating System
1/*
2 * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice, this
9 * list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <AK/HashMap.h>
28#include <LibHTML/CSS/PropertyID.h>
29#include <LibHTML/CSS/StyleSheet.h>
30#include <LibHTML/Parser/CSSParser.h>
31#include <ctype.h>
32#include <stdio.h>
33#include <stdlib.h>
34
35#define PARSE_ASSERT(x) \
36 if (!(x)) { \
37 dbg() << "CSS PARSER ASSERTION FAILED: " << #x; \
38 dbg() << "At character# " << index << " in CSS: _" << css << "_"; \
39 ASSERT_NOT_REACHED(); \
40 }
41
42static Optional<Color> parse_css_color(const StringView& view)
43{
44 auto color = Color::from_string(view);
45 if (color.has_value())
46 return color;
47
48 return {};
49}
50
51static Optional<float> try_parse_float(const StringView& string)
52{
53 const char* str = string.characters_without_null_termination();
54 size_t len = string.length();
55 size_t weight = 1;
56 int exp_val = 0;
57 float value = 0.0f;
58 float fraction = 0.0f;
59 bool has_sign = false;
60 bool is_negative = false;
61 bool is_fractional = false;
62 bool is_scientific = false;
63
64 if (str[0] == '-') {
65 is_negative = true;
66 has_sign = true;
67 }
68 if (str[0] == '+') {
69 has_sign = true;
70 }
71
72 for (size_t i = has_sign; i < len; i++) {
73
74 // Looks like we're about to start working on the fractional part
75 if (str[i] == '.') {
76 is_fractional = true;
77 continue;
78 }
79
80 if (str[i] == 'e' || str[i] == 'E') {
81 if (str[i + 1] == '-' || str[i + 1] == '+')
82 exp_val = atoi(str + i + 2);
83 else
84 exp_val = atoi(str + i + 1);
85
86 is_scientific = true;
87 continue;
88 }
89
90 if (str[i] < '0' || str[i] > '9' || exp_val != 0) {
91 return {};
92 continue;
93 }
94
95 if (is_fractional) {
96 fraction *= 10;
97 fraction += str[i] - '0';
98 weight *= 10;
99 } else {
100 value = value * 10;
101 value += str[i] - '0';
102 }
103 }
104
105 fraction /= weight;
106 value += fraction;
107
108 if (is_scientific) {
109 bool divide = exp_val < 0;
110 if (divide)
111 exp_val *= -1;
112
113 for (int i = 0; i < exp_val; i++) {
114 if (divide)
115 value /= 10;
116 else
117 value *= 10;
118 }
119 }
120
121 return is_negative ? -value : value;
122}
123
124static Optional<float> parse_number(const StringView& view)
125{
126 if (view.length() >= 2 && view[view.length() - 2] == 'p' && view[view.length() - 1] == 'x')
127 return parse_number(view.substring_view(0, view.length() - 2));
128
129 return try_parse_float(view);
130}
131
132NonnullRefPtr<StyleValue> parse_css_value(const StringView& string)
133{
134 auto number = parse_number(string);
135 if (number.has_value())
136 return LengthStyleValue::create(Length(number.value(), Length::Type::Absolute));
137 if (string == "inherit")
138 return InheritStyleValue::create();
139 if (string == "initial")
140 return InitialStyleValue::create();
141 if (string == "auto")
142 return LengthStyleValue::create(Length());
143
144 auto color = parse_css_color(string);
145 if (color.has_value())
146 return ColorStyleValue::create(color.value());
147
148 if (string == "-libhtml-link")
149 return IdentifierStyleValue::create(CSS::ValueID::VendorSpecificLink);
150
151 return StringStyleValue::create(string);
152}
153
154class CSSParser {
155public:
156 CSSParser(const StringView& input)
157 : css(input)
158 {
159 }
160
161 bool next_is(const char* str) const
162 {
163 size_t len = strlen(str);
164 for (size_t i = 0; i < len; ++i) {
165 if (peek(i) != str[i])
166 return false;
167 }
168 return true;
169 }
170
171 char peek(size_t offset = 0) const
172 {
173 if ((index + offset) < css.length())
174 return css[index + offset];
175 return 0;
176 }
177
178 char consume_specific(char ch)
179 {
180 if (peek() != ch) {
181 dbg() << "peek() != '" << ch << "'";
182 }
183 PARSE_ASSERT(peek() == ch);
184 PARSE_ASSERT(index < css.length());
185 ++index;
186 return ch;
187 }
188
189 char consume_one()
190 {
191 PARSE_ASSERT(index < css.length());
192 return css[index++];
193 };
194
195 bool consume_whitespace_or_comments()
196 {
197 size_t original_index = index;
198 bool in_comment = false;
199 for (; index < css.length(); ++index) {
200 char ch = peek();
201 if (isspace(ch))
202 continue;
203 if (!in_comment && ch == '/' && peek(1) == '*') {
204 in_comment = true;
205 ++index;
206 continue;
207 }
208 if (in_comment && ch == '*' && peek(1) == '/') {
209 in_comment = false;
210 ++index;
211 continue;
212 }
213 if (in_comment)
214 continue;
215 break;
216 }
217 return original_index != index;
218 }
219
220 bool is_valid_selector_char(char ch) const
221 {
222 return isalnum(ch) || ch == '-' || ch == '_' || ch == '(' || ch == ')' || ch == '@';
223 }
224
225 bool is_combinator(char ch) const
226 {
227 return ch == '~' || ch == '>' || ch == '+';
228 }
229
230 Optional<Selector::SimpleSelector> parse_simple_selector()
231 {
232 if (consume_whitespace_or_comments())
233 return {};
234
235 if (peek() == '{' || peek() == ',' || is_combinator(peek()))
236 return {};
237
238 Selector::SimpleSelector::Type type;
239
240 if (peek() == '*') {
241 type = Selector::SimpleSelector::Type::Universal;
242 consume_one();
243 return Selector::SimpleSelector {
244 type,
245 Selector::SimpleSelector::PseudoClass::None,
246 String(),
247 Selector::SimpleSelector::AttributeMatchType::None,
248 String(),
249 String()
250 };
251 }
252
253 if (peek() == '.') {
254 type = Selector::SimpleSelector::Type::Class;
255 consume_one();
256 } else if (peek() == '#') {
257 type = Selector::SimpleSelector::Type::Id;
258 consume_one();
259 } else if (isalpha(peek())) {
260 type = Selector::SimpleSelector::Type::TagName;
261 } else {
262 type = Selector::SimpleSelector::Type::Universal;
263 }
264
265 if (type != Selector::SimpleSelector::Type::Universal) {
266 while (is_valid_selector_char(peek()))
267 buffer.append(consume_one());
268 PARSE_ASSERT(!buffer.is_null());
269 }
270
271 Selector::SimpleSelector simple_selector {
272 type,
273 Selector::SimpleSelector::PseudoClass::None,
274 String::copy(buffer),
275 Selector::SimpleSelector::AttributeMatchType::None,
276 String(),
277 String()
278 };
279 buffer.clear();
280
281 if (peek() == '[') {
282 Selector::SimpleSelector::AttributeMatchType attribute_match_type = Selector::SimpleSelector::AttributeMatchType::HasAttribute;
283 String attribute_name;
284 String attribute_value;
285 bool in_value = false;
286 consume_specific('[');
287 char expected_end_of_attribute_selector = ']';
288 while (peek() != expected_end_of_attribute_selector) {
289 char ch = consume_one();
290 if (ch == '=') {
291 attribute_match_type = Selector::SimpleSelector::AttributeMatchType::ExactValueMatch;
292 attribute_name = String::copy(buffer);
293 buffer.clear();
294 in_value = true;
295 consume_whitespace_or_comments();
296 if (peek() == '\'') {
297 expected_end_of_attribute_selector = '\'';
298 consume_one();
299 } else if (peek() == '"') {
300 expected_end_of_attribute_selector = '"';
301 consume_one();
302 }
303 continue;
304 }
305 buffer.append(ch);
306 }
307 if (in_value)
308 attribute_value = String::copy(buffer);
309 else
310 attribute_name = String::copy(buffer);
311 buffer.clear();
312 simple_selector.attribute_match_type = attribute_match_type;
313 simple_selector.attribute_name = attribute_name;
314 simple_selector.attribute_value = attribute_value;
315 if (expected_end_of_attribute_selector != ']')
316 consume_specific(expected_end_of_attribute_selector);
317 consume_whitespace_or_comments();
318 consume_specific(']');
319 }
320
321 if (peek() == ':') {
322 // FIXME: Implement pseudo elements.
323 [[maybe_unused]] bool is_pseudo_element = false;
324 consume_one();
325 if (peek() == ':') {
326 is_pseudo_element = true;
327 consume_one();
328 }
329 if (next_is("not")) {
330 buffer.append(consume_one());
331 buffer.append(consume_one());
332 buffer.append(consume_one());
333 buffer.append(consume_specific('('));
334 while (peek() != ')')
335 buffer.append(consume_one());
336 buffer.append(consume_specific(')'));
337 } else {
338 while (is_valid_selector_char(peek()))
339 buffer.append(consume_one());
340 }
341
342 auto pseudo_name = String::copy(buffer);
343 buffer.clear();
344
345 if (pseudo_name == "link")
346 simple_selector.pseudo_class = Selector::SimpleSelector::PseudoClass::Link;
347 else if (pseudo_name == "hover")
348 simple_selector.pseudo_class = Selector::SimpleSelector::PseudoClass::Hover;
349 else if (pseudo_name == "first-child")
350 simple_selector.pseudo_class = Selector::SimpleSelector::PseudoClass::FirstChild;
351 else if (pseudo_name == "last-child")
352 simple_selector.pseudo_class = Selector::SimpleSelector::PseudoClass::LastChild;
353 else if (pseudo_name == "only-child")
354 simple_selector.pseudo_class = Selector::SimpleSelector::PseudoClass::OnlyChild;
355 else if (pseudo_name == "empty")
356 simple_selector.pseudo_class = Selector::SimpleSelector::PseudoClass::Empty;
357 }
358
359 return simple_selector;
360 }
361
362 Optional<Selector::ComplexSelector> parse_complex_selector()
363 {
364 auto relation = Selector::ComplexSelector::Relation::Descendant;
365
366 if (peek() == '{' || peek() == ',')
367 return {};
368
369 if (is_combinator(peek())) {
370 switch (peek()) {
371 case '>':
372 relation = Selector::ComplexSelector::Relation::ImmediateChild;
373 break;
374 case '+':
375 relation = Selector::ComplexSelector::Relation::AdjacentSibling;
376 break;
377 case '~':
378 relation = Selector::ComplexSelector::Relation::GeneralSibling;
379 break;
380 }
381 consume_one();
382 consume_whitespace_or_comments();
383 }
384
385 consume_whitespace_or_comments();
386
387 Vector<Selector::SimpleSelector> simple_selectors;
388 for (;;) {
389 auto component = parse_simple_selector();
390 if (!component.has_value())
391 break;
392 simple_selectors.append(component.value());
393 // If this assert triggers, we're most likely up to no good.
394 PARSE_ASSERT(simple_selectors.size() < 100);
395 }
396
397 return Selector::ComplexSelector { relation, move(simple_selectors) };
398 }
399
400 void parse_selector()
401 {
402 Vector<Selector::ComplexSelector> complex_selectors;
403
404 for (;;) {
405 auto complex_selector = parse_complex_selector();
406 if (complex_selector.has_value())
407 complex_selectors.append(complex_selector.value());
408 consume_whitespace_or_comments();
409 if (peek() == ',' || peek() == '{')
410 break;
411 }
412
413 if (complex_selectors.is_empty())
414 return;
415 complex_selectors.first().relation = Selector::ComplexSelector::Relation::None;
416
417 current_rule.selectors.append(Selector(move(complex_selectors)));
418 };
419
420 void parse_selector_list()
421 {
422 for (;;) {
423 parse_selector();
424 consume_whitespace_or_comments();
425 if (peek() == ',') {
426 consume_one();
427 continue;
428 }
429 if (peek() == '{')
430 break;
431 }
432 }
433
434 bool is_valid_property_name_char(char ch) const
435 {
436 return ch && !isspace(ch) && ch != ':';
437 }
438
439 bool is_valid_property_value_char(char ch) const
440 {
441 return ch && ch != '!' && ch != ';' && ch != '}';
442 }
443
444 struct ValueAndImportant {
445 String value;
446 bool important { false };
447 };
448
449 ValueAndImportant consume_css_value()
450 {
451 buffer.clear();
452
453 int paren_nesting_level = 0;
454 bool important = false;
455
456 for (;;) {
457 char ch = peek();
458 if (ch == '(') {
459 ++paren_nesting_level;
460 buffer.append(consume_one());
461 continue;
462 }
463 if (ch == ')') {
464 PARSE_ASSERT(paren_nesting_level > 0);
465 --paren_nesting_level;
466 buffer.append(consume_one());
467 continue;
468 }
469 if (paren_nesting_level > 0) {
470 buffer.append(consume_one());
471 continue;
472 }
473 if (next_is("!important")) {
474 consume_specific('!');
475 consume_specific('i');
476 consume_specific('m');
477 consume_specific('p');
478 consume_specific('o');
479 consume_specific('r');
480 consume_specific('t');
481 consume_specific('a');
482 consume_specific('n');
483 consume_specific('t');
484 important = true;
485 continue;
486 }
487 if (next_is("/*")) {
488 consume_whitespace_or_comments();
489 continue;
490 }
491 if (!ch)
492 break;
493 if (ch == '}')
494 break;
495 if (ch == ';')
496 break;
497 buffer.append(consume_one());
498 }
499
500 // Remove trailing whitespace.
501 while (!buffer.is_empty() && isspace(buffer.last()))
502 buffer.take_last();
503
504 auto string = String::copy(buffer);
505 buffer.clear();
506
507 return { string, important };
508 }
509
510 Optional<StyleProperty> parse_property()
511 {
512 consume_whitespace_or_comments();
513 if (peek() == ';') {
514 consume_one();
515 return {};
516 }
517 if (peek() == '}')
518 return {};
519 buffer.clear();
520 while (is_valid_property_name_char(peek()))
521 buffer.append(consume_one());
522 auto property_name = String::copy(buffer);
523 buffer.clear();
524 consume_whitespace_or_comments();
525 consume_specific(':');
526 consume_whitespace_or_comments();
527
528 auto [property_value, important] = consume_css_value();
529
530 consume_whitespace_or_comments();
531
532 if (peek() && peek() != '}')
533 consume_specific(';');
534
535 auto property_id = CSS::property_id_from_string(property_name);
536 return StyleProperty { property_id, parse_css_value(property_value), important };
537 }
538
539 void parse_declaration()
540 {
541 for (;;) {
542 auto property = parse_property();
543 if (property.has_value())
544 current_rule.properties.append(property.value());
545 consume_whitespace_or_comments();
546 if (peek() == '}')
547 break;
548 }
549 }
550
551 void parse_rule()
552 {
553 consume_whitespace_or_comments();
554 if (index >= css.length())
555 return;
556
557 // FIXME: We ignore @-rules for now.
558 if (peek() == '@') {
559 while (peek() != '{')
560 consume_one();
561 int level = 0;
562 for (;;) {
563 auto ch = consume_one();
564 if (ch == '{') {
565 ++level;
566 } else if (ch == '}') {
567 --level;
568 if (level == 0)
569 break;
570 }
571 }
572 consume_whitespace_or_comments();
573 return;
574 }
575
576 parse_selector_list();
577 consume_specific('{');
578 parse_declaration();
579 consume_specific('}');
580 rules.append(StyleRule::create(move(current_rule.selectors), StyleDeclaration::create(move(current_rule.properties))));
581 consume_whitespace_or_comments();
582 }
583
584 RefPtr<StyleSheet> parse_sheet()
585 {
586 while (index < css.length()) {
587 parse_rule();
588 }
589
590 return StyleSheet::create(move(rules));
591 }
592
593 RefPtr<StyleDeclaration> parse_standalone_declaration()
594 {
595 consume_whitespace_or_comments();
596 for (;;) {
597 auto property = parse_property();
598 if (property.has_value())
599 current_rule.properties.append(property.value());
600 consume_whitespace_or_comments();
601 if (!peek())
602 break;
603 }
604 return StyleDeclaration::create(move(current_rule.properties));
605 }
606
607private:
608 NonnullRefPtrVector<StyleRule> rules;
609
610 struct CurrentRule {
611 Vector<Selector> selectors;
612 Vector<StyleProperty> properties;
613 };
614
615 CurrentRule current_rule;
616 Vector<char> buffer;
617
618 size_t index = 0;
619
620 StringView css;
621};
622
623RefPtr<StyleSheet> parse_css(const StringView& css)
624{
625 CSSParser parser(css);
626 return parser.parse_sheet();
627}
628
629RefPtr<StyleDeclaration> parse_css_declaration(const StringView& css)
630{
631 CSSParser parser(css);
632 return parser.parse_standalone_declaration();
633}