Serenity Operating System
at master 674 lines 20 kB view raw
1/* 2 * Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org> 3 * Copyright (c) 2021, Peter Elliott <pelliott@serenityos.org> 4 * 5 * SPDX-License-Identifier: BSD-2-Clause 6 */ 7 8#include <AK/ScopeGuard.h> 9#include <AK/StringBuilder.h> 10#include <LibMarkdown/Text.h> 11#include <LibMarkdown/Visitor.h> 12#include <ctype.h> 13#include <string.h> 14 15namespace Markdown { 16 17void Text::EmphasisNode::render_to_html(StringBuilder& builder) const 18{ 19 builder.append((strong) ? "<strong>"sv : "<em>"sv); 20 child->render_to_html(builder); 21 builder.append((strong) ? "</strong>"sv : "</em>"sv); 22} 23 24void Text::EmphasisNode::render_for_terminal(StringBuilder& builder) const 25{ 26 if (strong) { 27 builder.append("\e[1m"sv); 28 child->render_for_terminal(builder); 29 builder.append("\e[22m"sv); 30 } else { 31 builder.append("\e[3m"sv); 32 child->render_for_terminal(builder); 33 builder.append("\e[23m"sv); 34 } 35} 36 37size_t Text::EmphasisNode::terminal_length() const 38{ 39 return child->terminal_length(); 40} 41 42RecursionDecision Text::EmphasisNode::walk(Visitor& visitor) const 43{ 44 RecursionDecision rd = visitor.visit(*this); 45 if (rd != RecursionDecision::Recurse) 46 return rd; 47 48 return child->walk(visitor); 49} 50 51void Text::CodeNode::render_to_html(StringBuilder& builder) const 52{ 53 builder.append("<code>"sv); 54 code->render_to_html(builder); 55 builder.append("</code>"sv); 56} 57 58void Text::CodeNode::render_for_terminal(StringBuilder& builder) const 59{ 60 builder.append("\e[1m"sv); 61 code->render_for_terminal(builder); 62 builder.append("\e[22m"sv); 63} 64 65size_t Text::CodeNode::terminal_length() const 66{ 67 return code->terminal_length(); 68} 69 70RecursionDecision Text::CodeNode::walk(Visitor& visitor) const 71{ 72 RecursionDecision rd = visitor.visit(*this); 73 if (rd != RecursionDecision::Recurse) 74 return rd; 75 76 return code->walk(visitor); 77} 78 79void Text::BreakNode::render_to_html(StringBuilder& builder) const 80{ 81 builder.append("<br />"sv); 82} 83 84void Text::BreakNode::render_for_terminal(StringBuilder&) const 85{ 86} 87 88size_t Text::BreakNode::terminal_length() const 89{ 90 return 0; 91} 92 93RecursionDecision Text::BreakNode::walk(Visitor& visitor) const 94{ 95 RecursionDecision rd = visitor.visit(*this); 96 if (rd != RecursionDecision::Recurse) 97 return rd; 98 // Normalize return value 99 return RecursionDecision::Continue; 100} 101 102void Text::TextNode::render_to_html(StringBuilder& builder) const 103{ 104 builder.append(escape_html_entities(text)); 105} 106 107void Text::TextNode::render_for_terminal(StringBuilder& builder) const 108{ 109 if (collapsible && (text == "\n" || text.is_whitespace())) { 110 builder.append(' '); 111 } else { 112 builder.append(text); 113 } 114} 115 116size_t Text::TextNode::terminal_length() const 117{ 118 if (collapsible && text.is_whitespace()) { 119 return 1; 120 } 121 122 return text.length(); 123} 124 125RecursionDecision Text::TextNode::walk(Visitor& visitor) const 126{ 127 RecursionDecision rd = visitor.visit(*this); 128 if (rd != RecursionDecision::Recurse) 129 return rd; 130 rd = visitor.visit(text); 131 if (rd != RecursionDecision::Recurse) 132 return rd; 133 // Normalize return value 134 return RecursionDecision::Continue; 135} 136 137void Text::LinkNode::render_to_html(StringBuilder& builder) const 138{ 139 if (is_image) { 140 builder.append("<img src=\""sv); 141 builder.append(escape_html_entities(href)); 142 if (has_image_dimensions()) { 143 builder.append("\" style=\""sv); 144 if (image_width.has_value()) 145 builder.appendff("width: {}px;", *image_width); 146 if (image_height.has_value()) 147 builder.appendff("height: {}px;", *image_height); 148 } 149 builder.append("\" alt=\""sv); 150 text->render_to_html(builder); 151 builder.append("\" >"sv); 152 } else { 153 builder.append("<a href=\""sv); 154 builder.append(escape_html_entities(href)); 155 builder.append("\">"sv); 156 text->render_to_html(builder); 157 builder.append("</a>"sv); 158 } 159} 160 161void Text::LinkNode::render_for_terminal(StringBuilder& builder) const 162{ 163 bool is_linked = href.contains("://"sv); 164 if (is_linked) { 165 builder.append("\033[0;34m\e]8;;"sv); 166 builder.append(href); 167 builder.append("\e\\"sv); 168 } 169 170 text->render_for_terminal(builder); 171 172 if (is_linked) { 173 builder.appendff(" <{}>", href); 174 builder.append("\033]8;;\033\\\033[0m"sv); 175 } 176} 177 178size_t Text::LinkNode::terminal_length() const 179{ 180 return text->terminal_length(); 181} 182 183RecursionDecision Text::LinkNode::walk(Visitor& visitor) const 184{ 185 RecursionDecision rd = visitor.visit(*this); 186 if (rd != RecursionDecision::Recurse) 187 return rd; 188 189 // Don't recurse on href. 190 191 return text->walk(visitor); 192} 193 194void Text::MultiNode::render_to_html(StringBuilder& builder) const 195{ 196 for (auto& child : children) { 197 child->render_to_html(builder); 198 } 199} 200 201void Text::MultiNode::render_for_terminal(StringBuilder& builder) const 202{ 203 for (auto& child : children) { 204 child->render_for_terminal(builder); 205 } 206} 207 208size_t Text::MultiNode::terminal_length() const 209{ 210 size_t length = 0; 211 for (auto& child : children) { 212 length += child->terminal_length(); 213 } 214 return length; 215} 216 217RecursionDecision Text::MultiNode::walk(Visitor& visitor) const 218{ 219 RecursionDecision rd = visitor.visit(*this); 220 if (rd != RecursionDecision::Recurse) 221 return rd; 222 223 for (auto const& child : children) { 224 rd = child->walk(visitor); 225 if (rd == RecursionDecision::Break) 226 return rd; 227 } 228 229 return RecursionDecision::Continue; 230} 231 232void Text::StrikeThroughNode::render_to_html(StringBuilder& builder) const 233{ 234 builder.append("<del>"sv); 235 striked_text->render_to_html(builder); 236 builder.append("</del>"sv); 237} 238 239void Text::StrikeThroughNode::render_for_terminal(StringBuilder& builder) const 240{ 241 builder.append("\e[9m"sv); 242 striked_text->render_for_terminal(builder); 243 builder.append("\e[29m"sv); 244} 245 246size_t Text::StrikeThroughNode::terminal_length() const 247{ 248 return striked_text->terminal_length(); 249} 250 251RecursionDecision Text::StrikeThroughNode::walk(Visitor& visitor) const 252{ 253 RecursionDecision rd = visitor.visit(*this); 254 if (rd != RecursionDecision::Recurse) 255 return rd; 256 257 return striked_text->walk(visitor); 258} 259 260size_t Text::terminal_length() const 261{ 262 return m_node->terminal_length(); 263} 264 265DeprecatedString Text::render_to_html() const 266{ 267 StringBuilder builder; 268 m_node->render_to_html(builder); 269 return builder.to_deprecated_string().trim(" \n\t"sv); 270} 271 272DeprecatedString Text::render_for_terminal() const 273{ 274 StringBuilder builder; 275 m_node->render_for_terminal(builder); 276 return builder.to_deprecated_string().trim(" \n\t"sv); 277} 278 279RecursionDecision Text::walk(Visitor& visitor) const 280{ 281 RecursionDecision rd = visitor.visit(*this); 282 if (rd != RecursionDecision::Recurse) 283 return rd; 284 285 return m_node->walk(visitor); 286} 287 288Text Text::parse(StringView str) 289{ 290 Text text; 291 auto const tokens = tokenize(str); 292 auto iterator = tokens.begin(); 293 text.m_node = parse_sequence(iterator, false); 294 return text; 295} 296 297static bool flanking(StringView str, size_t start, size_t end, int dir) 298{ 299 ssize_t next = ((dir > 0) ? end : start) + dir; 300 if (next < 0 || next >= (ssize_t)str.length()) 301 return false; 302 303 if (isspace(str[next])) 304 return false; 305 306 if (!ispunct(str[next])) 307 return true; 308 309 ssize_t prev = ((dir > 0) ? start : end) - dir; 310 if (prev < 0 || prev >= (ssize_t)str.length()) 311 return true; 312 313 return isspace(str[prev]) || ispunct(str[prev]); 314} 315 316Vector<Text::Token> Text::tokenize(StringView str) 317{ 318 Vector<Token> tokens; 319 StringBuilder current_token; 320 321 auto flush_run = [&](bool left_flanking, bool right_flanking, bool punct_before, bool punct_after, bool is_run) { 322 if (current_token.is_empty()) 323 return; 324 325 tokens.append({ 326 current_token.to_deprecated_string(), 327 left_flanking, 328 right_flanking, 329 punct_before, 330 punct_after, 331 is_run, 332 }); 333 current_token.clear(); 334 }; 335 336 auto flush_token = [&]() { 337 flush_run(false, false, false, false, false); 338 }; 339 340 bool in_space = false; 341 342 for (size_t offset = 0; offset < str.length(); ++offset) { 343 auto has = [&](StringView seq) { 344 if (offset + seq.length() > str.length()) 345 return false; 346 347 return str.substring_view(offset, seq.length()) == seq; 348 }; 349 350 auto expect = [&](StringView seq) { 351 VERIFY(has(seq)); 352 flush_token(); 353 current_token.append(seq); 354 flush_token(); 355 offset += seq.length() - 1; 356 }; 357 358 char ch = str[offset]; 359 if (ch != ' ' && in_space) { 360 flush_token(); 361 in_space = false; 362 } 363 364 if (ch == '\\' && offset + 1 < str.length() && ispunct(str[offset + 1])) { 365 current_token.append(str[offset + 1]); 366 ++offset; 367 } else if (ch == '*' || ch == '_' || ch == '`' || ch == '~') { 368 flush_token(); 369 370 char delim = ch; 371 size_t run_offset; 372 for (run_offset = offset; run_offset < str.length() && str[run_offset] == delim; ++run_offset) { 373 current_token.append(str[run_offset]); 374 } 375 376 flush_run(flanking(str, offset, run_offset - 1, +1), 377 flanking(str, offset, run_offset - 1, -1), 378 offset > 0 && ispunct(str[offset - 1]), 379 run_offset < str.length() && ispunct(str[run_offset]), 380 true); 381 offset = run_offset - 1; 382 383 } else if (ch == ' ') { 384 if (!in_space) { 385 flush_token(); 386 in_space = true; 387 } 388 current_token.append(ch); 389 } else if (has("\n"sv)) { 390 expect("\n"sv); 391 } else if (has("["sv)) { 392 expect("["sv); 393 } else if (has("!["sv)) { 394 expect("!["sv); 395 } else if (has("]("sv)) { 396 expect("]("sv); 397 } else if (has(")"sv)) { 398 expect(")"sv); 399 } else { 400 current_token.append(ch); 401 } 402 } 403 flush_token(); 404 return tokens; 405} 406 407NonnullOwnPtr<Text::MultiNode> Text::parse_sequence(Vector<Token>::ConstIterator& tokens, bool in_link) 408{ 409 auto node = make<MultiNode>(); 410 411 for (; !tokens.is_end(); ++tokens) { 412 if (tokens->is_space()) { 413 node->children.append(parse_break(tokens)); 414 } else if (*tokens == "\n"sv) { 415 node->children.append(parse_newline(tokens)); 416 } else if (tokens->is_run) { 417 switch (tokens->run_char()) { 418 case '*': 419 case '_': 420 node->children.append(parse_emph(tokens, in_link)); 421 break; 422 case '`': 423 node->children.append(parse_code(tokens)); 424 break; 425 case '~': 426 node->children.append(parse_strike_through(tokens)); 427 break; 428 } 429 } else if (*tokens == "["sv || *tokens == "!["sv) { 430 node->children.append(parse_link(tokens)); 431 } else if (in_link && *tokens == "]("sv) { 432 return node; 433 } else { 434 node->children.append(make<TextNode>(tokens->data)); 435 } 436 437 if (in_link && !tokens.is_end() && *tokens == "]("sv) 438 return node; 439 440 if (tokens.is_end()) 441 break; 442 } 443 return node; 444} 445 446NonnullOwnPtr<Text::Node> Text::parse_break(Vector<Token>::ConstIterator& tokens) 447{ 448 auto next_tok = tokens + 1; 449 if (next_tok.is_end() || *next_tok != "\n"sv) 450 return make<TextNode>(tokens->data); 451 452 if (tokens->data.length() >= 2) 453 return make<BreakNode>(); 454 455 return make<MultiNode>(); 456} 457 458NonnullOwnPtr<Text::Node> Text::parse_newline(Vector<Token>::ConstIterator& tokens) 459{ 460 auto node = make<TextNode>(tokens->data); 461 auto next_tok = tokens + 1; 462 if (!next_tok.is_end() && next_tok->is_space()) 463 // Skip whitespace after newline. 464 ++tokens; 465 466 return node; 467} 468 469bool Text::can_open(Token const& opening) 470{ 471 return (opening.run_char() == '~' && opening.left_flanking) || (opening.run_char() == '*' && opening.left_flanking) || (opening.run_char() == '_' && opening.left_flanking && (!opening.right_flanking || opening.punct_before)); 472} 473 474bool Text::can_close_for(Token const& opening, Text::Token const& closing) 475{ 476 if (opening.run_char() != closing.run_char()) 477 return false; 478 479 if (opening.run_length() != closing.run_length()) 480 return false; 481 482 return (opening.run_char() == '~' && closing.right_flanking) || (opening.run_char() == '*' && closing.right_flanking) || (opening.run_char() == '_' && closing.right_flanking && (!closing.left_flanking || closing.punct_after)); 483} 484 485NonnullOwnPtr<Text::Node> Text::parse_emph(Vector<Token>::ConstIterator& tokens, bool in_link) 486{ 487 auto opening = *tokens; 488 489 // Check that the opening delimiter run is properly flanking. 490 if (!can_open(opening)) 491 return make<TextNode>(opening.data); 492 493 auto child = make<MultiNode>(); 494 for (++tokens; !tokens.is_end(); ++tokens) { 495 if (tokens->is_space()) { 496 child->children.append(parse_break(tokens)); 497 } else if (*tokens == "\n"sv) { 498 child->children.append(parse_newline(tokens)); 499 } else if (tokens->is_run) { 500 if (can_close_for(opening, *tokens)) { 501 return make<EmphasisNode>(opening.run_length() >= 2, move(child)); 502 } 503 504 switch (tokens->run_char()) { 505 case '*': 506 case '_': 507 child->children.append(parse_emph(tokens, in_link)); 508 break; 509 case '`': 510 child->children.append(parse_code(tokens)); 511 break; 512 case '~': 513 child->children.append(parse_strike_through(tokens)); 514 break; 515 } 516 } else if (*tokens == "["sv || *tokens == "!["sv) { 517 child->children.append(parse_link(tokens)); 518 } else if (in_link && *tokens == "]("sv) { 519 child->children.prepend(make<TextNode>(opening.data)); 520 return child; 521 } else { 522 child->children.append(make<TextNode>(tokens->data)); 523 } 524 525 if (in_link && !tokens.is_end() && *tokens == "]("sv) { 526 child->children.prepend(make<TextNode>(opening.data)); 527 return child; 528 } 529 530 if (tokens.is_end()) 531 break; 532 } 533 child->children.prepend(make<TextNode>(opening.data)); 534 return child; 535} 536 537NonnullOwnPtr<Text::Node> Text::parse_code(Vector<Token>::ConstIterator& tokens) 538{ 539 auto opening = *tokens; 540 541 auto is_closing = [&](Token const& token) { 542 return token.is_run && token.run_char() == '`' && token.run_length() == opening.run_length(); 543 }; 544 545 bool is_all_whitespace = true; 546 auto code = make<MultiNode>(); 547 for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) { 548 if (is_closing(*iterator)) { 549 tokens = iterator; 550 551 // Strip first and last space, when appropriate. 552 if (!is_all_whitespace) { 553 auto& first = dynamic_cast<TextNode&>(*code->children.first()); 554 auto& last = dynamic_cast<TextNode&>(*code->children.last()); 555 if (first.text.starts_with(' ') && last.text.ends_with(' ')) { 556 first.text = first.text.substring(1); 557 last.text = last.text.substring(0, last.text.length() - 1); 558 } 559 } 560 561 return make<CodeNode>(move(code)); 562 } 563 564 is_all_whitespace = is_all_whitespace && iterator->data.is_whitespace(); 565 code->children.append(make<TextNode>((*iterator == "\n"sv) ? " " : iterator->data, false)); 566 } 567 568 return make<TextNode>(opening.data); 569} 570 571NonnullOwnPtr<Text::Node> Text::parse_link(Vector<Token>::ConstIterator& tokens) 572{ 573 auto opening = *tokens++; 574 bool is_image = opening == "!["sv; 575 576 auto link_text = parse_sequence(tokens, true); 577 578 if (tokens.is_end() || *tokens != "]("sv) { 579 link_text->children.prepend(make<TextNode>(opening.data)); 580 return link_text; 581 } 582 auto separator = *tokens; 583 VERIFY(separator == "]("sv); 584 585 Optional<int> image_width; 586 Optional<int> image_height; 587 588 auto parse_image_dimensions = [&](StringView dimensions) -> bool { 589 if (!dimensions.starts_with('=')) 590 return false; 591 592 ArmedScopeGuard clear_image_dimensions = [&] { 593 image_width = {}; 594 image_height = {}; 595 }; 596 597 auto dimension_seperator = dimensions.find('x', 1); 598 if (!dimension_seperator.has_value()) 599 return false; 600 601 auto width_string = dimensions.substring_view(1, *dimension_seperator - 1); 602 if (!width_string.is_empty()) { 603 auto width = width_string.to_int(); 604 if (!width.has_value()) 605 return false; 606 image_width = width; 607 } 608 609 auto height_start = *dimension_seperator + 1; 610 if (height_start < dimensions.length()) { 611 auto height_string = dimensions.substring_view(height_start); 612 auto height = height_string.to_int(); 613 if (!height.has_value()) 614 return false; 615 image_height = height; 616 } 617 618 clear_image_dimensions.disarm(); 619 return true; 620 }; 621 622 StringBuilder address; 623 for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) { 624 // FIXME: What to do if there's multiple dimension tokens? 625 if (is_image && !address.is_empty() && parse_image_dimensions(iterator->data)) 626 continue; 627 628 if (*iterator == ")"sv) { 629 tokens = iterator; 630 return make<LinkNode>(is_image, move(link_text), address.to_deprecated_string().trim_whitespace(), image_width, image_height); 631 } 632 633 address.append(iterator->data); 634 } 635 636 link_text->children.prepend(make<TextNode>(opening.data)); 637 link_text->children.append(make<TextNode>(separator.data)); 638 return link_text; 639} 640 641NonnullOwnPtr<Text::Node> Text::parse_strike_through(Vector<Token>::ConstIterator& tokens) 642{ 643 auto opening = *tokens; 644 645 auto is_closing = [&](Token const& token) { 646 return token.is_run && token.run_char() == '~' && token.run_length() == opening.run_length(); 647 }; 648 649 bool is_all_whitespace = true; 650 auto striked_text = make<MultiNode>(); 651 for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) { 652 if (is_closing(*iterator)) { 653 tokens = iterator; 654 655 if (!is_all_whitespace) { 656 auto& first = dynamic_cast<TextNode&>(*striked_text->children.first()); 657 auto& last = dynamic_cast<TextNode&>(*striked_text->children.last()); 658 if (first.text.starts_with(' ') && last.text.ends_with(' ')) { 659 first.text = first.text.substring(1); 660 last.text = last.text.substring(0, last.text.length() - 1); 661 } 662 } 663 664 return make<StrikeThroughNode>(move(striked_text)); 665 } 666 667 is_all_whitespace = is_all_whitespace && iterator->data.is_whitespace(); 668 striked_text->children.append(make<TextNode>((*iterator == "\n"sv) ? " " : iterator->data, false)); 669 } 670 671 return make<TextNode>(opening.data); 672} 673 674}