loading up the forgejo repo on tangled to test page performance
at forgejo 1324 lines 38 kB view raw
1// Copyright 2017 The Gitea Authors. All rights reserved. 2// Copyright 2025 The Forgejo Authors. 3// SPDX-License-Identifier: MIT 4 5package markup 6 7import ( 8 "bytes" 9 "io" 10 "net/url" 11 "path" 12 "path/filepath" 13 "regexp" 14 "strings" 15 "sync" 16 17 "forgejo.org/modules/base" 18 "forgejo.org/modules/emoji" 19 "forgejo.org/modules/git" 20 "forgejo.org/modules/log" 21 "forgejo.org/modules/markup/common" 22 "forgejo.org/modules/references" 23 "forgejo.org/modules/regexplru" 24 "forgejo.org/modules/setting" 25 "forgejo.org/modules/templates/vars" 26 "forgejo.org/modules/translation" 27 "forgejo.org/modules/util" 28 29 "golang.org/x/net/html" 30 "golang.org/x/net/html/atom" 31 "mvdan.cc/xurls/v2" 32) 33 34// Issue name styles 35const ( 36 IssueNameStyleNumeric = "numeric" 37 IssueNameStyleAlphanumeric = "alphanumeric" 38 IssueNameStyleRegexp = "regexp" 39) 40 41var ( 42 // NOTE: All below regex matching do not perform any extra validation. 43 // Thus a link is produced even if the linked entity does not exist. 44 // While fast, this is also incorrect and lead to false positives. 45 // TODO: fix invalid linking issue 46 47 // valid chars in encoded path and parameter: [-+~_%.a-zA-Z0-9/] 48 49 // hashCurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae 50 // Although SHA1 hashes are 40 chars long, SHA256 are 64, the regex matches the hash from 7 to 64 chars in length 51 // so that abbreviated hash links can be used as well. This matches git and GitHub usability. 52 hashCurrentPattern = regexp.MustCompile(`(?:^|\s)[^\w\d]{0,2}([0-9a-f]{7,64})[^\w\d]{0,2}(?:\s|$)`) 53 54 // shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax 55 shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`) 56 57 // anyHashPattern splits url containing SHA into parts 58 anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(/[-+~_%.a-zA-Z0-9/]+)?(\?[-+~_%\.a-zA-Z0-9=&]+)?(#[-+~_%.a-zA-Z0-9]+)?`) 59 60 // comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash" 61 comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`) 62 63 validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`) 64 65 // While this email regex is definitely not perfect and I'm sure you can come up 66 // with edge cases, it is still accepted by the CommonMark specification, as 67 // well as the HTML5 spec: 68 // http://spec.commonmark.org/0.28/#email-address 69 // https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail) 70 emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))") 71 72 // blackfriday extensions create IDs like fn:user-content-footnote 73 blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`) 74 75 // EmojiShortCodeRegex find emoji by alias like :smile: 76 EmojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`) 77 78 InlineCodeBlockRegex = regexp.MustCompile("`[^`]+`") 79) 80 81// CSS class for action keywords (e.g. "closes: #1") 82const keywordClass = "issue-keyword" 83 84// IsLink reports whether link fits valid format. 85func IsLink(link []byte) bool { 86 return validLinksPattern.Match(link) 87} 88 89func IsLinkStr(link string) bool { 90 return validLinksPattern.MatchString(link) 91} 92 93// regexp for full links to issues/pulls 94var issueFullPattern *regexp.Regexp 95 96// Once for to prevent races 97var issueFullPatternOnce sync.Once 98 99func getIssueFullPattern() *regexp.Regexp { 100 issueFullPatternOnce.Do(func() { 101 // example: https://domain/org/repo/pulls/27#hash 102 issueFullPattern = regexp.MustCompile(regexp.QuoteMeta(setting.AppURL) + 103 `(?P<user>[\w_.-]+)\/(?P<repo>[\w_.-]+)\/(?:issues|pulls)\/(?P<num>(?:\w{1,10}-)?[1-9][0-9]*)(?P<subpath>\/[\w_.-]+)?(?:(?P<comment>#(?:issue|issuecomment)-\d+)|(?:[\?#](?:\S+)?))?\b`) 104 }) 105 return issueFullPattern 106} 107 108// CustomLinkURLSchemes allows for additional schemes to be detected when parsing links within text 109func CustomLinkURLSchemes(schemes []string) { 110 schemes = append(schemes, "http", "https") 111 withAuth := make([]string, 0, len(schemes)) 112 validScheme := regexp.MustCompile(`^[a-z]+$`) 113 for _, s := range schemes { 114 if !validScheme.MatchString(s) { 115 continue 116 } 117 without := false 118 for _, sna := range xurls.SchemesNoAuthority { 119 if s == sna { 120 without = true 121 break 122 } 123 } 124 if without { 125 s += ":" 126 } else { 127 s += "://" 128 } 129 withAuth = append(withAuth, s) 130 } 131 common.LinkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|")) 132} 133 134type postProcessError struct { 135 context string 136 err error 137} 138 139func (p *postProcessError) Error() string { 140 return "PostProcess: " + p.context + ", " + p.err.Error() 141} 142 143type processor func(ctx *RenderContext, node *html.Node) 144 145var defaultProcessors = []processor{ 146 fullIssuePatternProcessor, 147 comparePatternProcessor, 148 filePreviewPatternProcessor, 149 fullHashPatternProcessor, 150 shortLinkProcessor, 151 linkProcessor, 152 mentionProcessor, 153 issueIndexPatternProcessor, 154 commitCrossReferencePatternProcessor, 155 hashCurrentPatternProcessor, 156 emailAddressProcessor, 157 emojiProcessor, 158 emojiShortCodeProcessor, 159} 160 161// PostProcess does the final required transformations to the passed raw HTML 162// data, and ensures its validity. Transformations include: replacing links and 163// emails with HTML links, parsing shortlinks in the format of [[Link]], like 164// MediaWiki, linking issues in the format #ID, and mentions in the format 165// @user, and others. 166func PostProcess( 167 ctx *RenderContext, 168 input io.Reader, 169 output io.Writer, 170) error { 171 return postProcess(ctx, defaultProcessors, input, output) 172} 173 174var commitMessageProcessors = []processor{ 175 fullIssuePatternProcessor, 176 comparePatternProcessor, 177 fullHashPatternProcessor, 178 linkProcessor, 179 mentionProcessor, 180 issueIndexPatternProcessor, 181 commitCrossReferencePatternProcessor, 182 hashCurrentPatternProcessor, 183 emailAddressProcessor, 184 emojiProcessor, 185 emojiShortCodeProcessor, 186} 187 188// RenderCommitMessage will use the same logic as PostProcess, but will disable 189// the shortLinkProcessor and will add a defaultLinkProcessor if defaultLink is 190// set, which changes every text node into a link to the passed default link. 191func RenderCommitMessage( 192 ctx *RenderContext, 193 content string, 194) (string, error) { 195 procs := commitMessageProcessors 196 if ctx.DefaultLink != "" { 197 // we don't have to fear data races, because being 198 // commitMessageProcessors of fixed len and cap, every time we append 199 // something to it the slice is realloc+copied, so append always 200 // generates the slice ex-novo. 201 procs = append(procs, genDefaultLinkProcessor(ctx.DefaultLink)) 202 } 203 return renderProcessString(ctx, procs, content) 204} 205 206var commitMessageSubjectProcessors = []processor{ 207 fullIssuePatternProcessor, 208 comparePatternProcessor, 209 fullHashPatternProcessor, 210 linkProcessor, 211 mentionProcessor, 212 issueIndexPatternProcessor, 213 commitCrossReferencePatternProcessor, 214 hashCurrentPatternProcessor, 215 emojiShortCodeProcessor, 216 emojiProcessor, 217} 218 219var emojiProcessors = []processor{ 220 emojiShortCodeProcessor, 221 emojiProcessor, 222} 223 224// RenderCommitMessageSubject will use the same logic as PostProcess and 225// RenderCommitMessage, but will disable the shortLinkProcessor and 226// emailAddressProcessor, will add a defaultLinkProcessor if defaultLink is set, 227// which changes every text node into a link to the passed default link. 228func RenderCommitMessageSubject( 229 ctx *RenderContext, 230 content string, 231) (string, error) { 232 procs := commitMessageSubjectProcessors 233 if ctx.DefaultLink != "" { 234 // we don't have to fear data races, because being 235 // commitMessageSubjectProcessors of fixed len and cap, every time we 236 // append something to it the slice is realloc+copied, so append always 237 // generates the slice ex-novo. 238 procs = append(procs, genDefaultLinkProcessor(ctx.DefaultLink)) 239 } 240 return renderProcessString(ctx, procs, content) 241} 242 243// RenderIssueTitle to process title on individual issue/pull page 244func RenderIssueTitle( 245 ctx *RenderContext, 246 title string, 247) (string, error) { 248 return renderProcessString(ctx, []processor{ 249 inlineCodeBlockProcessor, 250 issueIndexPatternProcessor, 251 commitCrossReferencePatternProcessor, 252 hashCurrentPatternProcessor, 253 emojiShortCodeProcessor, 254 emojiProcessor, 255 }, title) 256} 257 258// RenderRefIssueTitle to process title on places where an issue is referenced 259func RenderRefIssueTitle( 260 ctx *RenderContext, 261 title string, 262) (string, error) { 263 return renderProcessString(ctx, []processor{ 264 inlineCodeBlockProcessor, 265 issueIndexPatternProcessor, 266 emojiShortCodeProcessor, 267 emojiProcessor, 268 }, title) 269} 270 271func renderProcessString(ctx *RenderContext, procs []processor, content string) (string, error) { 272 var buf strings.Builder 273 if err := postProcess(ctx, procs, strings.NewReader(content), &buf); err != nil { 274 return "", err 275 } 276 return buf.String(), nil 277} 278 279// RenderDescriptionHTML will use similar logic as PostProcess, but will 280// use a single special linkProcessor. 281func RenderDescriptionHTML( 282 ctx *RenderContext, 283 content string, 284) (string, error) { 285 return renderProcessString(ctx, []processor{ 286 descriptionLinkProcessor, 287 emojiShortCodeProcessor, 288 emojiProcessor, 289 }, content) 290} 291 292// RenderEmoji for when we want to just process emoji and shortcodes 293// in various places it isn't already run through the normal markdown processor 294func RenderEmoji( 295 ctx *RenderContext, 296 content string, 297) (string, error) { 298 return renderProcessString(ctx, emojiProcessors, content) 299} 300 301var ( 302 tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL]\b)|(/?[hH][eE][aA][dD]\b))`) 303 nulCleaner = strings.NewReplacer("\000", "") 304) 305 306func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output io.Writer) error { 307 defer ctx.Cancel() 308 // FIXME: don't read all content to memory 309 rawHTML, err := io.ReadAll(input) 310 if err != nil { 311 return err 312 } 313 314 // parse the HTML 315 node, err := html.Parse(io.MultiReader( 316 // prepend "<html><body>" 317 strings.NewReader("<html><body>"), 318 // Strip out nuls - they're always invalid 319 bytes.NewReader(tagCleaner.ReplaceAll([]byte(nulCleaner.Replace(string(rawHTML))), []byte("&lt;$1"))), 320 // close the tags 321 strings.NewReader("</body></html>"), 322 )) 323 if err != nil { 324 return &postProcessError{"invalid HTML", err} 325 } 326 327 if node.Type == html.DocumentNode { 328 node = node.FirstChild 329 } 330 331 visitNode(ctx, procs, node) 332 333 newNodes := make([]*html.Node, 0, 5) 334 335 if node.Data == "html" { 336 node = node.FirstChild 337 for node != nil && node.Data != "body" { 338 node = node.NextSibling 339 } 340 } 341 if node != nil { 342 if node.Data == "body" { 343 child := node.FirstChild 344 for child != nil { 345 newNodes = append(newNodes, child) 346 child = child.NextSibling 347 } 348 } else { 349 newNodes = append(newNodes, node) 350 } 351 } 352 353 // Render everything to buf. 354 for _, node := range newNodes { 355 if err := html.Render(output, node); err != nil { 356 return &postProcessError{"error rendering processed HTML", err} 357 } 358 } 359 return nil 360} 361 362func visitNode(ctx *RenderContext, procs []processor, node *html.Node) { 363 // Add user-content- to IDs and "#" links if they don't already have them 364 for idx, attr := range node.Attr { 365 val := strings.TrimPrefix(attr.Val, "#") 366 notHasPrefix := !strings.HasPrefix(val, "user-content-") && !blackfridayExtRegex.MatchString(val) 367 368 if attr.Key == "id" && notHasPrefix { 369 node.Attr[idx].Val = "user-content-" + attr.Val 370 } 371 372 if attr.Key == "href" && strings.HasPrefix(attr.Val, "#") && notHasPrefix { 373 node.Attr[idx].Val = "#user-content-" + val 374 } 375 376 if attr.Key == "class" && attr.Val == "emoji" { 377 procs = nil 378 } 379 } 380 381 // We ignore code and pre. 382 switch node.Type { 383 case html.TextNode: 384 processTextNodes(ctx, procs, node) 385 case html.ElementNode: 386 if node.Data == "img" { 387 for i, attr := range node.Attr { 388 if attr.Key != "src" { 389 continue 390 } 391 if len(attr.Val) > 0 && !IsLinkStr(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") { 392 attr.Val = util.URLJoin(ctx.Links.ResolveMediaLink(ctx.IsWiki), attr.Val) 393 } 394 attr.Val = camoHandleLink(attr.Val) 395 node.Attr[i] = attr 396 } 397 } else if node.Data == "a" { 398 // Restrict text in links to emojis 399 procs = emojiProcessors 400 } else if node.Data == "code" || node.Data == "pre" { 401 return 402 } else if node.Data == "i" { 403 for _, attr := range node.Attr { 404 if attr.Key != "class" { 405 continue 406 } 407 classes := strings.Split(attr.Val, " ") 408 for i, class := range classes { 409 if class == "icon" { 410 classes[0], classes[i] = classes[i], classes[0] 411 attr.Val = strings.Join(classes, " ") 412 413 // Remove all children of icons 414 child := node.FirstChild 415 for child != nil { 416 node.RemoveChild(child) 417 child = node.FirstChild 418 } 419 break 420 } 421 } 422 } 423 } 424 for n := node.FirstChild; n != nil; n = n.NextSibling { 425 visitNode(ctx, procs, n) 426 } 427 default: 428 } 429 // ignore everything else 430} 431 432// processTextNodes runs the passed node through various processors, in order to handle 433// all kinds of special links handled by the post-processing. 434func processTextNodes(ctx *RenderContext, procs []processor, node *html.Node) { 435 for _, p := range procs { 436 p(ctx, node) 437 } 438} 439 440// createKeyword() renders a highlighted version of an action keyword 441func createKeyword(content string) *html.Node { 442 span := &html.Node{ 443 Type: html.ElementNode, 444 Data: atom.Span.String(), 445 Attr: []html.Attribute{}, 446 } 447 span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: keywordClass}) 448 449 text := &html.Node{ 450 Type: html.TextNode, 451 Data: content, 452 } 453 span.AppendChild(text) 454 455 return span 456} 457 458func createInlineCode(content string) *html.Node { 459 code := &html.Node{ 460 Type: html.ElementNode, 461 Data: atom.Code.String(), 462 Attr: []html.Attribute{}, 463 } 464 465 code.Attr = append(code.Attr, html.Attribute{Key: "class", Val: "inline-code-block"}) 466 467 text := &html.Node{ 468 Type: html.TextNode, 469 Data: content, 470 } 471 472 code.AppendChild(text) 473 return code 474} 475 476func createEmoji(content, class, name, alias string) *html.Node { 477 span := &html.Node{ 478 Type: html.ElementNode, 479 Data: atom.Span.String(), 480 Attr: []html.Attribute{}, 481 } 482 if class != "" { 483 span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: class}) 484 } 485 if name != "" { 486 span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: name}) 487 } 488 if alias != "" { 489 span.Attr = append(span.Attr, html.Attribute{Key: "data-alias", Val: alias}) 490 } 491 492 text := &html.Node{ 493 Type: html.TextNode, 494 Data: content, 495 } 496 497 span.AppendChild(text) 498 return span 499} 500 501func createCustomEmoji(alias string) *html.Node { 502 span := &html.Node{ 503 Type: html.ElementNode, 504 Data: atom.Span.String(), 505 Attr: []html.Attribute{}, 506 } 507 span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: "emoji"}) 508 span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: alias}) 509 span.Attr = append(span.Attr, html.Attribute{Key: "data-alias", Val: alias}) 510 511 img := &html.Node{ 512 Type: html.ElementNode, 513 DataAtom: atom.Img, 514 Data: "img", 515 Attr: []html.Attribute{}, 516 } 517 img.Attr = append(img.Attr, html.Attribute{Key: "alt", Val: ":" + alias + ":"}) 518 img.Attr = append(img.Attr, html.Attribute{Key: "src", Val: setting.StaticURLPrefix + "/assets/img/emoji/" + alias + ".png"}) 519 520 span.AppendChild(img) 521 return span 522} 523 524func createLink(href, content, class string) *html.Node { 525 a := &html.Node{ 526 Type: html.ElementNode, 527 Data: atom.A.String(), 528 Attr: []html.Attribute{{Key: "href", Val: href}}, 529 } 530 531 if class != "" { 532 a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class}) 533 } 534 535 text := &html.Node{ 536 Type: html.TextNode, 537 Data: content, 538 } 539 540 a.AppendChild(text) 541 return a 542} 543 544func createCodeLink(href, content, class string) *html.Node { 545 a := &html.Node{ 546 Type: html.ElementNode, 547 Data: atom.A.String(), 548 Attr: []html.Attribute{{Key: "href", Val: href}}, 549 } 550 551 if class != "" { 552 a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class}) 553 } 554 555 text := &html.Node{ 556 Type: html.TextNode, 557 Data: content, 558 } 559 560 code := &html.Node{ 561 Type: html.ElementNode, 562 Data: atom.Code.String(), 563 Attr: []html.Attribute{{Key: "class", Val: "nohighlight"}}, 564 } 565 566 code.AppendChild(text) 567 a.AppendChild(code) 568 return a 569} 570 571// replaceContent takes text node, and in its content it replaces a section of 572// it with the specified newNode. 573func replaceContent(node *html.Node, i, j int, newNode *html.Node) { 574 replaceContentList(node, i, j, []*html.Node{newNode}) 575} 576 577// replaceContentList takes text node, and in its content it replaces a section of 578// it with the specified newNodes. An example to visualize how this can work can 579// be found here: https://play.golang.org/p/5zP8NnHZ03s 580func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) { 581 // get the data before and after the match 582 before := node.Data[:i] 583 after := node.Data[j:] 584 585 // Replace in the current node the text, so that it is only what it is 586 // supposed to have. 587 node.Data = before 588 589 // Get the current next sibling, before which we place the replaced data, 590 // and after that we place the new text node. 591 nextSibling := node.NextSibling 592 for _, n := range newNodes { 593 node.Parent.InsertBefore(n, nextSibling) 594 } 595 if after != "" { 596 node.Parent.InsertBefore(&html.Node{ 597 Type: html.TextNode, 598 Data: after, 599 }, nextSibling) 600 } 601} 602 603func mentionProcessor(ctx *RenderContext, node *html.Node) { 604 start := 0 605 next := node.NextSibling 606 for node != nil && node != next && start < len(node.Data) { 607 // We replace only the first mention; other mentions will be addressed later 608 found, loc := references.FindFirstMentionBytes([]byte(node.Data[start:])) 609 if !found { 610 return 611 } 612 loc.Start += start 613 loc.End += start 614 mention := node.Data[loc.Start:loc.End] 615 var teams string 616 teams, ok := ctx.Metas["teams"] 617 // FIXME: util.URLJoin may not be necessary here: 618 // - setting.AppURL is defined to have a terminal '/' so unless mention[1:] 619 // is an AppSubURL link we can probably fallback to concatenation. 620 // team mention should follow @orgName/teamName style 621 if ok && strings.Contains(mention, "/") { 622 mentionOrgAndTeam := strings.Split(mention, "/") 623 if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") { 624 replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention")) 625 node = node.NextSibling.NextSibling 626 start = 0 627 continue 628 } 629 start = loc.End 630 continue 631 } 632 mentionedUsername := mention[1:] 633 634 if DefaultProcessorHelper.IsUsernameMentionable != nil && DefaultProcessorHelper.IsUsernameMentionable(ctx.Ctx, mentionedUsername) { 635 replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), mentionedUsername), mention, "mention")) 636 node = node.NextSibling.NextSibling 637 start = 0 638 } else { 639 start = loc.End 640 } 641 } 642} 643 644func shortLinkProcessor(ctx *RenderContext, node *html.Node) { 645 next := node.NextSibling 646 for node != nil && node != next { 647 m := shortLinkPattern.FindStringSubmatchIndex(node.Data) 648 if m == nil { 649 return 650 } 651 652 content := node.Data[m[2]:m[3]] 653 tail := node.Data[m[4]:m[5]] 654 props := make(map[string]string) 655 656 // MediaWiki uses [[link|text]], while GitHub uses [[text|link]] 657 // It makes page handling terrible, but we prefer GitHub syntax 658 // And fall back to MediaWiki only when it is obvious from the look 659 // Of text and link contents 660 sl := strings.Split(content, "|") 661 for _, v := range sl { 662 if equalPos := strings.IndexByte(v, '='); equalPos == -1 { 663 // There is no equal in this argument; this is a mandatory arg 664 if props["name"] == "" { 665 if IsLinkStr(v) { 666 // If we clearly see it is a link, we save it so 667 668 // But first we need to ensure, that if both mandatory args provided 669 // look like links, we stick to GitHub syntax 670 if props["link"] != "" { 671 props["name"] = props["link"] 672 } 673 674 props["link"] = strings.TrimSpace(v) 675 } else { 676 props["name"] = v 677 } 678 } else { 679 props["link"] = strings.TrimSpace(v) 680 } 681 } else { 682 // There is an equal; optional argument. 683 684 sep := strings.IndexByte(v, '=') 685 key, val := v[:sep], html.UnescapeString(v[sep+1:]) 686 687 // When parsing HTML, x/net/html will change all quotes which are 688 // not used for syntax into UTF-8 quotes. So checking val[0] won't 689 // be enough, since that only checks a single byte. 690 if len(val) > 1 { 691 if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) || 692 (strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) { 693 const lenQuote = len("‘") 694 val = val[lenQuote : len(val)-lenQuote] 695 } else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) || 696 (strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) { 697 val = val[1 : len(val)-1] 698 } else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") { 699 const lenQuote = len("‘") 700 val = val[1 : len(val)-lenQuote] 701 } 702 } 703 props[key] = val 704 } 705 } 706 707 var name, link string 708 if props["link"] != "" { 709 link = props["link"] 710 } else if props["name"] != "" { 711 link = props["name"] 712 } 713 if props["title"] != "" { 714 name = props["title"] 715 } else if props["name"] != "" { 716 name = props["name"] 717 } else { 718 name = link 719 } 720 721 name += tail 722 image := false 723 switch ext := filepath.Ext(link); ext { 724 // fast path: empty string, ignore 725 case "": 726 // leave image as false 727 case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg": 728 image = true 729 } 730 731 childNode := &html.Node{} 732 linkNode := &html.Node{ 733 FirstChild: childNode, 734 LastChild: childNode, 735 Type: html.ElementNode, 736 Data: "a", 737 DataAtom: atom.A, 738 } 739 childNode.Parent = linkNode 740 absoluteLink := IsLinkStr(link) 741 if !absoluteLink { 742 if image { 743 link = strings.ReplaceAll(link, " ", "+") 744 } else { 745 link = strings.ReplaceAll(link, " ", "-") 746 } 747 if !strings.Contains(link, "/") { 748 link = url.PathEscape(link) 749 } 750 } 751 if image { 752 if !absoluteLink { 753 link = util.URLJoin(ctx.Links.ResolveMediaLink(ctx.IsWiki), link) 754 } 755 title := props["title"] 756 if title == "" { 757 title = props["alt"] 758 } 759 if title == "" { 760 title = path.Base(name) 761 } 762 alt := props["alt"] 763 764 // make the childNode an image - if we can, we also place the alt 765 childNode.Type = html.ElementNode 766 childNode.Data = "img" 767 childNode.DataAtom = atom.Img 768 childNode.Attr = []html.Attribute{ 769 {Key: "src", Val: link}, 770 {Key: "title", Val: title}, 771 {Key: "alt", Val: alt}, 772 } 773 } else { 774 if !absoluteLink { 775 if ctx.IsWiki { 776 link = util.URLJoin(ctx.Links.WikiLink(), link) 777 } else { 778 link = util.URLJoin(ctx.Links.SrcLink(), link) 779 } 780 } 781 childNode.Type = html.TextNode 782 childNode.Data = name 783 } 784 linkNode.Attr = []html.Attribute{{Key: "href", Val: link}} 785 replaceContent(node, m[0], m[1], linkNode) 786 node = node.NextSibling.NextSibling 787 } 788} 789 790func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) { 791 if ctx.Metas == nil { 792 return 793 } 794 next := node.NextSibling 795 for node != nil && node != next { 796 re := getIssueFullPattern() 797 linkIndex, m := re.FindStringIndex(node.Data), re.FindStringSubmatch(node.Data) 798 if linkIndex == nil || m == nil { 799 return 800 } 801 802 link := node.Data[linkIndex[0]:linkIndex[1]] 803 text := "#" + m[re.SubexpIndex("num")] + m[re.SubexpIndex("subpath")] 804 805 if len(m[re.SubexpIndex("comment")]) > 0 { 806 if locale, ok := ctx.Ctx.Value(translation.ContextKey).(translation.Locale); ok { 807 text += " " + locale.TrString("repo.from_comment") 808 } else { 809 text += " (comment)" 810 } 811 } 812 813 matchUser := m[re.SubexpIndex("user")] 814 matchRepo := m[re.SubexpIndex("repo")] 815 816 if matchUser == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] { 817 replaceContent(node, linkIndex[0], linkIndex[1], createLink(link, text, "ref-issue")) 818 } else { 819 text = matchUser + "/" + matchRepo + text 820 replaceContent(node, linkIndex[0], linkIndex[1], createLink(link, text, "ref-issue")) 821 } 822 node = node.NextSibling.NextSibling 823 } 824} 825 826func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) { 827 if ctx.Metas == nil { 828 return 829 } 830 831 // FIXME: the use of "mode" is quite dirty and hacky, for example: what is a "document"? how should it be rendered? 832 // The "mode" approach should be refactored to some other more clear&reliable way. 833 crossLinkOnly := (ctx.Metas["mode"] == "document" && !ctx.IsWiki) 834 835 var ( 836 found bool 837 ref *references.RenderizableReference 838 ) 839 840 next := node.NextSibling 841 842 for node != nil && node != next { 843 _, hasExtTrackFormat := ctx.Metas["format"] 844 845 // Repos with external issue trackers might still need to reference local PRs 846 // We need to concern with the first one that shows up in the text, whichever it is 847 isNumericStyle := ctx.Metas["style"] == "" || ctx.Metas["style"] == IssueNameStyleNumeric 848 foundNumeric, refNumeric := references.FindRenderizableReferenceNumeric(node.Data, hasExtTrackFormat && !isNumericStyle, crossLinkOnly) 849 850 switch ctx.Metas["style"] { 851 case "", IssueNameStyleNumeric: 852 found, ref = foundNumeric, refNumeric 853 case IssueNameStyleAlphanumeric: 854 found, ref = references.FindRenderizableReferenceAlphanumeric(node.Data) 855 case IssueNameStyleRegexp: 856 pattern, err := regexplru.GetCompiled(ctx.Metas["regexp"]) 857 if err != nil { 858 return 859 } 860 found, ref = references.FindRenderizableReferenceRegexp(node.Data, pattern) 861 } 862 863 // Repos with external issue trackers might still need to reference local PRs 864 // We need to concern with the first one that shows up in the text, whichever it is 865 if hasExtTrackFormat && !isNumericStyle && refNumeric != nil { 866 // If numeric (PR) was found, and it was BEFORE the non-numeric pattern, use that 867 // Allow a free-pass when non-numeric pattern wasn't found. 868 if found && (ref == nil || refNumeric.RefLocation.Start < ref.RefLocation.Start) { 869 found = foundNumeric 870 ref = refNumeric 871 } 872 } 873 if !found { 874 return 875 } 876 877 var link *html.Node 878 reftext := node.Data[ref.RefLocation.Start:ref.RefLocation.End] 879 if hasExtTrackFormat && !ref.IsPull && ref.Owner == "" { 880 ctx.Metas["index"] = ref.Issue 881 882 res, err := vars.Expand(ctx.Metas["format"], ctx.Metas) 883 if err != nil { 884 // here we could just log the error and continue the rendering 885 log.Error("unable to expand template vars for ref %s, err: %v", ref.Issue, err) 886 } 887 888 link = createLink(res, reftext, "ref-issue ref-external-issue") 889 } else { 890 // Path determines the type of link that will be rendered. It's unknown at this point whether 891 // the linked item is actually a PR or an issue. Luckily it's of no real consequence because 892 // Forgejo will redirect on click as appropriate. 893 path := "issues" 894 if ref.IsPull { 895 path = "pulls" 896 } 897 if ref.Owner == "" { 898 link = createLink(util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], path, ref.Issue), reftext, "ref-issue") 899 } else { 900 link = createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, path, ref.Issue), reftext, "ref-issue") 901 } 902 } 903 904 if ref.Action == references.XRefActionNone { 905 replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link) 906 node = node.NextSibling.NextSibling 907 continue 908 } 909 910 // Decorate action keywords if actionable 911 var keyword *html.Node 912 if references.IsXrefActionable(ref, hasExtTrackFormat) { 913 keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End]) 914 } else { 915 keyword = &html.Node{ 916 Type: html.TextNode, 917 Data: node.Data[ref.ActionLocation.Start:ref.ActionLocation.End], 918 } 919 } 920 spaces := &html.Node{ 921 Type: html.TextNode, 922 Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start], 923 } 924 replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link}) 925 node = node.NextSibling.NextSibling.NextSibling.NextSibling 926 } 927} 928 929func commitCrossReferencePatternProcessor(ctx *RenderContext, node *html.Node) { 930 next := node.NextSibling 931 932 for node != nil && node != next { 933 found, ref := references.FindRenderizableCommitCrossReference(node.Data) 934 if !found { 935 return 936 } 937 938 reftext := ref.Owner + "/" + ref.Name + "@" + base.ShortSha(ref.CommitSha) 939 link := createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, "commit", ref.CommitSha), reftext, "commit") 940 941 replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link) 942 node = node.NextSibling.NextSibling 943 } 944} 945 946// fullHashPatternProcessor renders SHA containing URLs 947func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) { 948 if ctx.Metas == nil { 949 return 950 } 951 952 next := node.NextSibling 953 for node != nil && node != next { 954 m := anyHashPattern.FindStringSubmatchIndex(node.Data) 955 if m == nil { 956 return 957 } 958 959 urlFull := node.Data[m[0]:m[1]] 960 text := base.ShortSha(node.Data[m[2]:m[3]]) 961 962 // 3rd capture group matches a optional path 963 subpath := "" 964 if m[5] > 0 { 965 subpath = node.Data[m[4]:m[5]] 966 } 967 968 // 5th capture group matches a optional url hash 969 hash := "" 970 if m[9] > 0 { 971 hash = node.Data[m[8]:m[9]][1:] 972 } 973 974 start := m[0] 975 end := m[1] 976 977 // If url ends in '.', it's very likely that it is not part of the 978 // actual url but used to finish a sentence. 979 if strings.HasSuffix(urlFull, ".") { 980 end-- 981 urlFull = urlFull[:len(urlFull)-1] 982 if hash != "" { 983 hash = hash[:len(hash)-1] 984 } else if subpath != "" { 985 subpath = subpath[:len(subpath)-1] 986 } 987 } 988 989 if subpath != "" { 990 text += subpath 991 } 992 993 if hash != "" { 994 text += " (" + hash + ")" 995 } 996 replaceContent(node, start, end, createCodeLink(urlFull, text, "commit")) 997 node = node.NextSibling.NextSibling 998 } 999} 1000 1001func comparePatternProcessor(ctx *RenderContext, node *html.Node) { 1002 if ctx.Metas == nil { 1003 return 1004 } 1005 1006 next := node.NextSibling 1007 for node != nil && node != next { 1008 m := comparePattern.FindStringSubmatchIndex(node.Data) 1009 if m == nil { 1010 return 1011 } 1012 1013 // Ensure that every group (m[0]...m[7]) has a match 1014 for i := 0; i < 8; i++ { 1015 if m[i] == -1 { 1016 return 1017 } 1018 } 1019 1020 urlFull := node.Data[m[0]:m[1]] 1021 text1 := base.ShortSha(node.Data[m[2]:m[3]]) 1022 textDots := base.ShortSha(node.Data[m[4]:m[5]]) 1023 text2 := base.ShortSha(node.Data[m[6]:m[7]]) 1024 1025 hash := "" 1026 if m[9] > 0 { 1027 hash = node.Data[m[8]:m[9]][1:] 1028 } 1029 1030 start := m[0] 1031 end := m[1] 1032 1033 // If url ends in '.', it's very likely that it is not part of the 1034 // actual url but used to finish a sentence. 1035 if strings.HasSuffix(urlFull, ".") { 1036 end-- 1037 urlFull = urlFull[:len(urlFull)-1] 1038 if hash != "" { 1039 hash = hash[:len(hash)-1] 1040 } else if text2 != "" { 1041 text2 = text2[:len(text2)-1] 1042 } 1043 } 1044 1045 text := text1 + textDots + text2 1046 if hash != "" { 1047 text += " (" + hash + ")" 1048 } 1049 replaceContent(node, start, end, createCodeLink(urlFull, text, "compare")) 1050 node = node.NextSibling.NextSibling 1051 } 1052} 1053 1054func filePreviewPatternProcessor(ctx *RenderContext, node *html.Node) { 1055 if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" { 1056 return 1057 } 1058 if DefaultProcessorHelper.GetRepoFileBlob == nil { 1059 return 1060 } 1061 1062 locale := translation.NewLocale("en-US") 1063 if ctx.Ctx != nil { 1064 ctxLocale, ok := ctx.Ctx.Value(translation.ContextKey).(translation.Locale) 1065 if ok { 1066 locale = ctxLocale 1067 } 1068 } 1069 1070 next := node.NextSibling 1071 for node != nil && node != next { 1072 previews := NewFilePreviews(ctx, node, locale) 1073 if previews == nil { 1074 node = node.NextSibling 1075 continue 1076 } 1077 1078 offset := 0 1079 for _, preview := range previews { 1080 previewNode := preview.CreateHTML(locale) 1081 1082 // Specialized version of replaceContent, so the parent paragraph element is not destroyed from our div 1083 before := node.Data[:(preview.start - offset)] 1084 after := node.Data[(preview.end - offset):] 1085 afterPrefix := "<p>" 1086 offset = preview.end - len(afterPrefix) 1087 node.Data = before 1088 nextSibling := node.NextSibling 1089 node.Parent.InsertBefore(&html.Node{ 1090 Type: html.RawNode, 1091 Data: "</p>", 1092 }, nextSibling) 1093 node.Parent.InsertBefore(previewNode, nextSibling) 1094 afterNode := &html.Node{ 1095 Type: html.RawNode, 1096 Data: afterPrefix + after, 1097 } 1098 node.Parent.InsertBefore(afterNode, nextSibling) 1099 node = afterNode 1100 } 1101 1102 node = node.NextSibling 1103 } 1104} 1105 1106func inlineCodeBlockProcessor(ctx *RenderContext, node *html.Node) { 1107 start := 0 1108 next := node.NextSibling 1109 for node != nil && node != next && start < len(node.Data) { 1110 m := InlineCodeBlockRegex.FindStringSubmatchIndex(node.Data[start:]) 1111 if m == nil { 1112 return 1113 } 1114 1115 code := node.Data[m[0]+1 : m[1]-1] 1116 replaceContent(node, m[0], m[1], createInlineCode(code)) 1117 node = node.NextSibling.NextSibling 1118 } 1119} 1120 1121// emojiShortCodeProcessor for rendering text like :smile: into emoji 1122func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) { 1123 start := 0 1124 next := node.NextSibling 1125 for node != nil && node != next && start < len(node.Data) { 1126 m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:]) 1127 if m == nil { 1128 return 1129 } 1130 m[0] += start 1131 m[1] += start 1132 1133 start = m[1] 1134 1135 alias := node.Data[m[0]:m[1]] 1136 alias = strings.ReplaceAll(alias, ":", "") 1137 converted := emoji.FromAlias(alias) 1138 if converted == nil { 1139 // check if this is a custom reaction 1140 if _, exist := setting.UI.CustomEmojisMap[alias]; exist { 1141 replaceContent(node, m[0], m[1], createCustomEmoji(alias)) 1142 node = node.NextSibling.NextSibling 1143 start = 0 1144 continue 1145 } 1146 continue 1147 } 1148 1149 replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description, alias)) 1150 node = node.NextSibling.NextSibling 1151 start = 0 1152 } 1153} 1154 1155// emoji processor to match emoji and add emoji class 1156func emojiProcessor(ctx *RenderContext, node *html.Node) { 1157 start := 0 1158 next := node.NextSibling 1159 for node != nil && node != next && start < len(node.Data) { 1160 m := emoji.FindEmojiSubmatchIndex(node.Data[start:]) 1161 if m == nil { 1162 return 1163 } 1164 m[0] += start 1165 m[1] += start 1166 1167 codepoint := node.Data[m[0]:m[1]] 1168 start = m[1] 1169 val := emoji.FromCode(codepoint) 1170 if val != nil { 1171 replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description, val.Aliases[0])) 1172 node = node.NextSibling.NextSibling 1173 start = 0 1174 } 1175 } 1176} 1177 1178// hashCurrentPatternProcessor renders SHA1/SHA256 strings to corresponding links that 1179// are assumed to be in the same repository. 1180func hashCurrentPatternProcessor(ctx *RenderContext, node *html.Node) { 1181 if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || ctx.Metas["repoPath"] == "" { 1182 return 1183 } 1184 1185 start := 0 1186 next := node.NextSibling 1187 if ctx.ShaExistCache == nil { 1188 ctx.ShaExistCache = make(map[string]bool) 1189 } 1190 for node != nil && node != next && start < len(node.Data) { 1191 m := hashCurrentPattern.FindStringSubmatchIndex(node.Data[start:]) 1192 if m == nil { 1193 return 1194 } 1195 m[2] += start 1196 m[3] += start 1197 1198 hash := node.Data[m[2]:m[3]] 1199 // The regex does not lie, it matches the hash pattern. 1200 // However, a regex cannot know if a hash actually exists or not. 1201 // We could assume that a SHA1 hash should probably contain alphas AND numerics 1202 // but that is not always the case. 1203 // Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash 1204 // as used by git and github for linking and thus we have to do similar. 1205 // Because of this, we check to make sure that a matched hash is actually 1206 // a commit in the repository before making it a link. 1207 1208 // check cache first 1209 exist, inCache := ctx.ShaExistCache[hash] 1210 if !inCache { 1211 if ctx.GitRepo == nil { 1212 var err error 1213 ctx.GitRepo, err = git.OpenRepository(ctx.Ctx, ctx.Metas["repoPath"]) 1214 if err != nil { 1215 log.Error("unable to open repository: %s Error: %v", ctx.Metas["repoPath"], err) 1216 return 1217 } 1218 ctx.AddCancel(func() { 1219 ctx.GitRepo.Close() 1220 ctx.GitRepo = nil 1221 }) 1222 } 1223 1224 exist = ctx.GitRepo.IsReferenceExist(hash) 1225 ctx.ShaExistCache[hash] = exist 1226 } 1227 1228 if !exist { 1229 start = m[3] 1230 continue 1231 } 1232 1233 link := util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], "commit", hash) 1234 replaceContent(node, m[2], m[3], createCodeLink(link, base.ShortSha(hash), "commit")) 1235 start = 0 1236 node = node.NextSibling.NextSibling 1237 } 1238} 1239 1240// emailAddressProcessor replaces raw email addresses with a mailto: link. 1241func emailAddressProcessor(ctx *RenderContext, node *html.Node) { 1242 next := node.NextSibling 1243 for node != nil && node != next { 1244 m := emailRegex.FindStringSubmatchIndex(node.Data) 1245 if m == nil { 1246 return 1247 } 1248 1249 mail := node.Data[m[2]:m[3]] 1250 replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto")) 1251 node = node.NextSibling.NextSibling 1252 } 1253} 1254 1255// linkProcessor creates links for any HTTP or HTTPS URL not captured by 1256// markdown. 1257func linkProcessor(ctx *RenderContext, node *html.Node) { 1258 next := node.NextSibling 1259 for node != nil && node != next { 1260 m := common.LinkRegex.FindStringIndex(node.Data) 1261 if m == nil { 1262 return 1263 } 1264 1265 uri := node.Data[m[0]:m[1]] 1266 replaceContent(node, m[0], m[1], createLink(uri, uri, "link")) 1267 node = node.NextSibling.NextSibling 1268 } 1269} 1270 1271func genDefaultLinkProcessor(defaultLink string) processor { 1272 return func(ctx *RenderContext, node *html.Node) { 1273 ch := &html.Node{ 1274 Parent: node, 1275 Type: html.TextNode, 1276 Data: node.Data, 1277 } 1278 1279 node.Type = html.ElementNode 1280 node.Data = "a" 1281 node.DataAtom = atom.A 1282 node.Attr = []html.Attribute{ 1283 {Key: "href", Val: defaultLink}, 1284 {Key: "class", Val: "default-link muted"}, 1285 } 1286 node.FirstChild, node.LastChild = ch, ch 1287 } 1288} 1289 1290// descriptionLinkProcessor creates links for DescriptionHTML 1291func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) { 1292 next := node.NextSibling 1293 for node != nil && node != next { 1294 m := common.LinkRegex.FindStringIndex(node.Data) 1295 if m == nil { 1296 return 1297 } 1298 1299 uri := node.Data[m[0]:m[1]] 1300 replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri)) 1301 node = node.NextSibling.NextSibling 1302 } 1303} 1304 1305func createDescriptionLink(href, content string) *html.Node { 1306 textNode := &html.Node{ 1307 Type: html.TextNode, 1308 Data: content, 1309 } 1310 linkNode := &html.Node{ 1311 FirstChild: textNode, 1312 LastChild: textNode, 1313 Type: html.ElementNode, 1314 Data: "a", 1315 DataAtom: atom.A, 1316 Attr: []html.Attribute{ 1317 {Key: "href", Val: href}, 1318 {Key: "target", Val: "_blank"}, 1319 {Key: "rel", Val: "noopener noreferrer"}, 1320 }, 1321 } 1322 textNode.Parent = linkNode 1323 return linkNode 1324}