Serenity Operating System
1/*
2 * Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
3 * Copyright (c) 2021, Peter Elliott <pelliott@serenityos.org>
4 *
5 * SPDX-License-Identifier: BSD-2-Clause
6 */
7
8#include <AK/ScopeGuard.h>
9#include <AK/StringBuilder.h>
10#include <LibMarkdown/Text.h>
11#include <LibMarkdown/Visitor.h>
12#include <ctype.h>
13#include <string.h>
14
15namespace Markdown {
16
17void Text::EmphasisNode::render_to_html(StringBuilder& builder) const
18{
19 builder.append((strong) ? "<strong>"sv : "<em>"sv);
20 child->render_to_html(builder);
21 builder.append((strong) ? "</strong>"sv : "</em>"sv);
22}
23
24void Text::EmphasisNode::render_for_terminal(StringBuilder& builder) const
25{
26 if (strong) {
27 builder.append("\e[1m"sv);
28 child->render_for_terminal(builder);
29 builder.append("\e[22m"sv);
30 } else {
31 builder.append("\e[3m"sv);
32 child->render_for_terminal(builder);
33 builder.append("\e[23m"sv);
34 }
35}
36
37size_t Text::EmphasisNode::terminal_length() const
38{
39 return child->terminal_length();
40}
41
42RecursionDecision Text::EmphasisNode::walk(Visitor& visitor) const
43{
44 RecursionDecision rd = visitor.visit(*this);
45 if (rd != RecursionDecision::Recurse)
46 return rd;
47
48 return child->walk(visitor);
49}
50
51void Text::CodeNode::render_to_html(StringBuilder& builder) const
52{
53 builder.append("<code>"sv);
54 code->render_to_html(builder);
55 builder.append("</code>"sv);
56}
57
58void Text::CodeNode::render_for_terminal(StringBuilder& builder) const
59{
60 builder.append("\e[1m"sv);
61 code->render_for_terminal(builder);
62 builder.append("\e[22m"sv);
63}
64
65size_t Text::CodeNode::terminal_length() const
66{
67 return code->terminal_length();
68}
69
70RecursionDecision Text::CodeNode::walk(Visitor& visitor) const
71{
72 RecursionDecision rd = visitor.visit(*this);
73 if (rd != RecursionDecision::Recurse)
74 return rd;
75
76 return code->walk(visitor);
77}
78
79void Text::BreakNode::render_to_html(StringBuilder& builder) const
80{
81 builder.append("<br />"sv);
82}
83
84void Text::BreakNode::render_for_terminal(StringBuilder&) const
85{
86}
87
88size_t Text::BreakNode::terminal_length() const
89{
90 return 0;
91}
92
93RecursionDecision Text::BreakNode::walk(Visitor& visitor) const
94{
95 RecursionDecision rd = visitor.visit(*this);
96 if (rd != RecursionDecision::Recurse)
97 return rd;
98 // Normalize return value
99 return RecursionDecision::Continue;
100}
101
102void Text::TextNode::render_to_html(StringBuilder& builder) const
103{
104 builder.append(escape_html_entities(text));
105}
106
107void Text::TextNode::render_for_terminal(StringBuilder& builder) const
108{
109 if (collapsible && (text == "\n" || text.is_whitespace())) {
110 builder.append(' ');
111 } else {
112 builder.append(text);
113 }
114}
115
116size_t Text::TextNode::terminal_length() const
117{
118 if (collapsible && text.is_whitespace()) {
119 return 1;
120 }
121
122 return text.length();
123}
124
125RecursionDecision Text::TextNode::walk(Visitor& visitor) const
126{
127 RecursionDecision rd = visitor.visit(*this);
128 if (rd != RecursionDecision::Recurse)
129 return rd;
130 rd = visitor.visit(text);
131 if (rd != RecursionDecision::Recurse)
132 return rd;
133 // Normalize return value
134 return RecursionDecision::Continue;
135}
136
137void Text::LinkNode::render_to_html(StringBuilder& builder) const
138{
139 if (is_image) {
140 builder.append("<img src=\""sv);
141 builder.append(escape_html_entities(href));
142 if (has_image_dimensions()) {
143 builder.append("\" style=\""sv);
144 if (image_width.has_value())
145 builder.appendff("width: {}px;", *image_width);
146 if (image_height.has_value())
147 builder.appendff("height: {}px;", *image_height);
148 }
149 builder.append("\" alt=\""sv);
150 text->render_to_html(builder);
151 builder.append("\" >"sv);
152 } else {
153 builder.append("<a href=\""sv);
154 builder.append(escape_html_entities(href));
155 builder.append("\">"sv);
156 text->render_to_html(builder);
157 builder.append("</a>"sv);
158 }
159}
160
161void Text::LinkNode::render_for_terminal(StringBuilder& builder) const
162{
163 bool is_linked = href.contains("://"sv);
164 if (is_linked) {
165 builder.append("\033[0;34m\e]8;;"sv);
166 builder.append(href);
167 builder.append("\e\\"sv);
168 }
169
170 text->render_for_terminal(builder);
171
172 if (is_linked) {
173 builder.appendff(" <{}>", href);
174 builder.append("\033]8;;\033\\\033[0m"sv);
175 }
176}
177
178size_t Text::LinkNode::terminal_length() const
179{
180 return text->terminal_length();
181}
182
183RecursionDecision Text::LinkNode::walk(Visitor& visitor) const
184{
185 RecursionDecision rd = visitor.visit(*this);
186 if (rd != RecursionDecision::Recurse)
187 return rd;
188
189 // Don't recurse on href.
190
191 return text->walk(visitor);
192}
193
194void Text::MultiNode::render_to_html(StringBuilder& builder) const
195{
196 for (auto& child : children) {
197 child->render_to_html(builder);
198 }
199}
200
201void Text::MultiNode::render_for_terminal(StringBuilder& builder) const
202{
203 for (auto& child : children) {
204 child->render_for_terminal(builder);
205 }
206}
207
208size_t Text::MultiNode::terminal_length() const
209{
210 size_t length = 0;
211 for (auto& child : children) {
212 length += child->terminal_length();
213 }
214 return length;
215}
216
217RecursionDecision Text::MultiNode::walk(Visitor& visitor) const
218{
219 RecursionDecision rd = visitor.visit(*this);
220 if (rd != RecursionDecision::Recurse)
221 return rd;
222
223 for (auto const& child : children) {
224 rd = child->walk(visitor);
225 if (rd == RecursionDecision::Break)
226 return rd;
227 }
228
229 return RecursionDecision::Continue;
230}
231
232void Text::StrikeThroughNode::render_to_html(StringBuilder& builder) const
233{
234 builder.append("<del>"sv);
235 striked_text->render_to_html(builder);
236 builder.append("</del>"sv);
237}
238
239void Text::StrikeThroughNode::render_for_terminal(StringBuilder& builder) const
240{
241 builder.append("\e[9m"sv);
242 striked_text->render_for_terminal(builder);
243 builder.append("\e[29m"sv);
244}
245
246size_t Text::StrikeThroughNode::terminal_length() const
247{
248 return striked_text->terminal_length();
249}
250
251RecursionDecision Text::StrikeThroughNode::walk(Visitor& visitor) const
252{
253 RecursionDecision rd = visitor.visit(*this);
254 if (rd != RecursionDecision::Recurse)
255 return rd;
256
257 return striked_text->walk(visitor);
258}
259
260size_t Text::terminal_length() const
261{
262 return m_node->terminal_length();
263}
264
265DeprecatedString Text::render_to_html() const
266{
267 StringBuilder builder;
268 m_node->render_to_html(builder);
269 return builder.to_deprecated_string().trim(" \n\t"sv);
270}
271
272DeprecatedString Text::render_for_terminal() const
273{
274 StringBuilder builder;
275 m_node->render_for_terminal(builder);
276 return builder.to_deprecated_string().trim(" \n\t"sv);
277}
278
279RecursionDecision Text::walk(Visitor& visitor) const
280{
281 RecursionDecision rd = visitor.visit(*this);
282 if (rd != RecursionDecision::Recurse)
283 return rd;
284
285 return m_node->walk(visitor);
286}
287
288Text Text::parse(StringView str)
289{
290 Text text;
291 auto const tokens = tokenize(str);
292 auto iterator = tokens.begin();
293 text.m_node = parse_sequence(iterator, false);
294 return text;
295}
296
297static bool flanking(StringView str, size_t start, size_t end, int dir)
298{
299 ssize_t next = ((dir > 0) ? end : start) + dir;
300 if (next < 0 || next >= (ssize_t)str.length())
301 return false;
302
303 if (isspace(str[next]))
304 return false;
305
306 if (!ispunct(str[next]))
307 return true;
308
309 ssize_t prev = ((dir > 0) ? start : end) - dir;
310 if (prev < 0 || prev >= (ssize_t)str.length())
311 return true;
312
313 return isspace(str[prev]) || ispunct(str[prev]);
314}
315
316Vector<Text::Token> Text::tokenize(StringView str)
317{
318 Vector<Token> tokens;
319 StringBuilder current_token;
320
321 auto flush_run = [&](bool left_flanking, bool right_flanking, bool punct_before, bool punct_after, bool is_run) {
322 if (current_token.is_empty())
323 return;
324
325 tokens.append({
326 current_token.to_deprecated_string(),
327 left_flanking,
328 right_flanking,
329 punct_before,
330 punct_after,
331 is_run,
332 });
333 current_token.clear();
334 };
335
336 auto flush_token = [&]() {
337 flush_run(false, false, false, false, false);
338 };
339
340 bool in_space = false;
341
342 for (size_t offset = 0; offset < str.length(); ++offset) {
343 auto has = [&](StringView seq) {
344 if (offset + seq.length() > str.length())
345 return false;
346
347 return str.substring_view(offset, seq.length()) == seq;
348 };
349
350 auto expect = [&](StringView seq) {
351 VERIFY(has(seq));
352 flush_token();
353 current_token.append(seq);
354 flush_token();
355 offset += seq.length() - 1;
356 };
357
358 char ch = str[offset];
359 if (ch != ' ' && in_space) {
360 flush_token();
361 in_space = false;
362 }
363
364 if (ch == '\\' && offset + 1 < str.length() && ispunct(str[offset + 1])) {
365 current_token.append(str[offset + 1]);
366 ++offset;
367 } else if (ch == '*' || ch == '_' || ch == '`' || ch == '~') {
368 flush_token();
369
370 char delim = ch;
371 size_t run_offset;
372 for (run_offset = offset; run_offset < str.length() && str[run_offset] == delim; ++run_offset) {
373 current_token.append(str[run_offset]);
374 }
375
376 flush_run(flanking(str, offset, run_offset - 1, +1),
377 flanking(str, offset, run_offset - 1, -1),
378 offset > 0 && ispunct(str[offset - 1]),
379 run_offset < str.length() && ispunct(str[run_offset]),
380 true);
381 offset = run_offset - 1;
382
383 } else if (ch == ' ') {
384 if (!in_space) {
385 flush_token();
386 in_space = true;
387 }
388 current_token.append(ch);
389 } else if (has("\n"sv)) {
390 expect("\n"sv);
391 } else if (has("["sv)) {
392 expect("["sv);
393 } else if (has(") {
396 expect("]("sv);
397 } else if (has(")"sv)) {
398 expect(")"sv);
399 } else {
400 current_token.append(ch);
401 }
402 }
403 flush_token();
404 return tokens;
405}
406
407NonnullOwnPtr<Text::MultiNode> Text::parse_sequence(Vector<Token>::ConstIterator& tokens, bool in_link)
408{
409 auto node = make<MultiNode>();
410
411 for (; !tokens.is_end(); ++tokens) {
412 if (tokens->is_space()) {
413 node->children.append(parse_break(tokens));
414 } else if (*tokens == "\n"sv) {
415 node->children.append(parse_newline(tokens));
416 } else if (tokens->is_run) {
417 switch (tokens->run_char()) {
418 case '*':
419 case '_':
420 node->children.append(parse_emph(tokens, in_link));
421 break;
422 case '`':
423 node->children.append(parse_code(tokens));
424 break;
425 case '~':
426 node->children.append(parse_strike_through(tokens));
427 break;
428 }
429 } else if (*tokens == "["sv || *tokens == " {
432 return node;
433 } else {
434 node->children.append(make<TextNode>(tokens->data));
435 }
436
437 if (in_link && !tokens.is_end() && *tokens == "]("sv)
438 return node;
439
440 if (tokens.is_end())
441 break;
442 }
443 return node;
444}
445
446NonnullOwnPtr<Text::Node> Text::parse_break(Vector<Token>::ConstIterator& tokens)
447{
448 auto next_tok = tokens + 1;
449 if (next_tok.is_end() || *next_tok != "\n"sv)
450 return make<TextNode>(tokens->data);
451
452 if (tokens->data.length() >= 2)
453 return make<BreakNode>();
454
455 return make<MultiNode>();
456}
457
458NonnullOwnPtr<Text::Node> Text::parse_newline(Vector<Token>::ConstIterator& tokens)
459{
460 auto node = make<TextNode>(tokens->data);
461 auto next_tok = tokens + 1;
462 if (!next_tok.is_end() && next_tok->is_space())
463 // Skip whitespace after newline.
464 ++tokens;
465
466 return node;
467}
468
469bool Text::can_open(Token const& opening)
470{
471 return (opening.run_char() == '~' && opening.left_flanking) || (opening.run_char() == '*' && opening.left_flanking) || (opening.run_char() == '_' && opening.left_flanking && (!opening.right_flanking || opening.punct_before));
472}
473
474bool Text::can_close_for(Token const& opening, Text::Token const& closing)
475{
476 if (opening.run_char() != closing.run_char())
477 return false;
478
479 if (opening.run_length() != closing.run_length())
480 return false;
481
482 return (opening.run_char() == '~' && closing.right_flanking) || (opening.run_char() == '*' && closing.right_flanking) || (opening.run_char() == '_' && closing.right_flanking && (!closing.left_flanking || closing.punct_after));
483}
484
485NonnullOwnPtr<Text::Node> Text::parse_emph(Vector<Token>::ConstIterator& tokens, bool in_link)
486{
487 auto opening = *tokens;
488
489 // Check that the opening delimiter run is properly flanking.
490 if (!can_open(opening))
491 return make<TextNode>(opening.data);
492
493 auto child = make<MultiNode>();
494 for (++tokens; !tokens.is_end(); ++tokens) {
495 if (tokens->is_space()) {
496 child->children.append(parse_break(tokens));
497 } else if (*tokens == "\n"sv) {
498 child->children.append(parse_newline(tokens));
499 } else if (tokens->is_run) {
500 if (can_close_for(opening, *tokens)) {
501 return make<EmphasisNode>(opening.run_length() >= 2, move(child));
502 }
503
504 switch (tokens->run_char()) {
505 case '*':
506 case '_':
507 child->children.append(parse_emph(tokens, in_link));
508 break;
509 case '`':
510 child->children.append(parse_code(tokens));
511 break;
512 case '~':
513 child->children.append(parse_strike_through(tokens));
514 break;
515 }
516 } else if (*tokens == "["sv || *tokens == " {
519 child->children.prepend(make<TextNode>(opening.data));
520 return child;
521 } else {
522 child->children.append(make<TextNode>(tokens->data));
523 }
524
525 if (in_link && !tokens.is_end() && *tokens == "]("sv) {
526 child->children.prepend(make<TextNode>(opening.data));
527 return child;
528 }
529
530 if (tokens.is_end())
531 break;
532 }
533 child->children.prepend(make<TextNode>(opening.data));
534 return child;
535}
536
537NonnullOwnPtr<Text::Node> Text::parse_code(Vector<Token>::ConstIterator& tokens)
538{
539 auto opening = *tokens;
540
541 auto is_closing = [&](Token const& token) {
542 return token.is_run && token.run_char() == '`' && token.run_length() == opening.run_length();
543 };
544
545 bool is_all_whitespace = true;
546 auto code = make<MultiNode>();
547 for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
548 if (is_closing(*iterator)) {
549 tokens = iterator;
550
551 // Strip first and last space, when appropriate.
552 if (!is_all_whitespace) {
553 auto& first = dynamic_cast<TextNode&>(*code->children.first());
554 auto& last = dynamic_cast<TextNode&>(*code->children.last());
555 if (first.text.starts_with(' ') && last.text.ends_with(' ')) {
556 first.text = first.text.substring(1);
557 last.text = last.text.substring(0, last.text.length() - 1);
558 }
559 }
560
561 return make<CodeNode>(move(code));
562 }
563
564 is_all_whitespace = is_all_whitespace && iterator->data.is_whitespace();
565 code->children.append(make<TextNode>((*iterator == "\n"sv) ? " " : iterator->data, false));
566 }
567
568 return make<TextNode>(opening.data);
569}
570
571NonnullOwnPtr<Text::Node> Text::parse_link(Vector<Token>::ConstIterator& tokens)
572{
573 auto opening = *tokens++;
574 bool is_image = opening == " {
579 link_text->children.prepend(make<TextNode>(opening.data));
580 return link_text;
581 }
582 auto separator = *tokens;
583 VERIFY(separator == "]("sv);
584
585 Optional<int> image_width;
586 Optional<int> image_height;
587
588 auto parse_image_dimensions = [&](StringView dimensions) -> bool {
589 if (!dimensions.starts_with('='))
590 return false;
591
592 ArmedScopeGuard clear_image_dimensions = [&] {
593 image_width = {};
594 image_height = {};
595 };
596
597 auto dimension_seperator = dimensions.find('x', 1);
598 if (!dimension_seperator.has_value())
599 return false;
600
601 auto width_string = dimensions.substring_view(1, *dimension_seperator - 1);
602 if (!width_string.is_empty()) {
603 auto width = width_string.to_int();
604 if (!width.has_value())
605 return false;
606 image_width = width;
607 }
608
609 auto height_start = *dimension_seperator + 1;
610 if (height_start < dimensions.length()) {
611 auto height_string = dimensions.substring_view(height_start);
612 auto height = height_string.to_int();
613 if (!height.has_value())
614 return false;
615 image_height = height;
616 }
617
618 clear_image_dimensions.disarm();
619 return true;
620 };
621
622 StringBuilder address;
623 for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
624 // FIXME: What to do if there's multiple dimension tokens?
625 if (is_image && !address.is_empty() && parse_image_dimensions(iterator->data))
626 continue;
627
628 if (*iterator == ")"sv) {
629 tokens = iterator;
630 return make<LinkNode>(is_image, move(link_text), address.to_deprecated_string().trim_whitespace(), image_width, image_height);
631 }
632
633 address.append(iterator->data);
634 }
635
636 link_text->children.prepend(make<TextNode>(opening.data));
637 link_text->children.append(make<TextNode>(separator.data));
638 return link_text;
639}
640
641NonnullOwnPtr<Text::Node> Text::parse_strike_through(Vector<Token>::ConstIterator& tokens)
642{
643 auto opening = *tokens;
644
645 auto is_closing = [&](Token const& token) {
646 return token.is_run && token.run_char() == '~' && token.run_length() == opening.run_length();
647 };
648
649 bool is_all_whitespace = true;
650 auto striked_text = make<MultiNode>();
651 for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) {
652 if (is_closing(*iterator)) {
653 tokens = iterator;
654
655 if (!is_all_whitespace) {
656 auto& first = dynamic_cast<TextNode&>(*striked_text->children.first());
657 auto& last = dynamic_cast<TextNode&>(*striked_text->children.last());
658 if (first.text.starts_with(' ') && last.text.ends_with(' ')) {
659 first.text = first.text.substring(1);
660 last.text = last.text.substring(0, last.text.length() - 1);
661 }
662 }
663
664 return make<StrikeThroughNode>(move(striked_text));
665 }
666
667 is_all_whitespace = is_all_whitespace && iterator->data.is_whitespace();
668 striked_text->children.append(make<TextNode>((*iterator == "\n"sv) ? " " : iterator->data, false));
669 }
670
671 return make<TextNode>(opening.data);
672}
673
674}