loading up the forgejo repo on tangled to test page performance
1// Copyright 2017 The Gitea Authors. All rights reserved.
2// Copyright 2025 The Forgejo Authors.
3// SPDX-License-Identifier: MIT
4
5package markup
6
7import (
8 "bytes"
9 "io"
10 "net/url"
11 "path"
12 "path/filepath"
13 "regexp"
14 "strings"
15 "sync"
16
17 "forgejo.org/modules/base"
18 "forgejo.org/modules/emoji"
19 "forgejo.org/modules/git"
20 "forgejo.org/modules/log"
21 "forgejo.org/modules/markup/common"
22 "forgejo.org/modules/references"
23 "forgejo.org/modules/regexplru"
24 "forgejo.org/modules/setting"
25 "forgejo.org/modules/templates/vars"
26 "forgejo.org/modules/translation"
27 "forgejo.org/modules/util"
28
29 "golang.org/x/net/html"
30 "golang.org/x/net/html/atom"
31 "mvdan.cc/xurls/v2"
32)
33
34// Issue name styles
35const (
36 IssueNameStyleNumeric = "numeric"
37 IssueNameStyleAlphanumeric = "alphanumeric"
38 IssueNameStyleRegexp = "regexp"
39)
40
41var (
42 // NOTE: All below regex matching do not perform any extra validation.
43 // Thus a link is produced even if the linked entity does not exist.
44 // While fast, this is also incorrect and lead to false positives.
45 // TODO: fix invalid linking issue
46
47 // valid chars in encoded path and parameter: [-+~_%.a-zA-Z0-9/]
48
49 // hashCurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
50 // Although SHA1 hashes are 40 chars long, SHA256 are 64, the regex matches the hash from 7 to 64 chars in length
51 // so that abbreviated hash links can be used as well. This matches git and GitHub usability.
52 hashCurrentPattern = regexp.MustCompile(`(?:^|\s)[^\w\d]{0,2}([0-9a-f]{7,64})[^\w\d]{0,2}(?:\s|$)`)
53
54 // shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
55 shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
56
57 // anyHashPattern splits url containing SHA into parts
58 anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(/[-+~_%.a-zA-Z0-9/]+)?(\?[-+~_%\.a-zA-Z0-9=&]+)?(#[-+~_%.a-zA-Z0-9]+)?`)
59
60 // comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
61 comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`)
62
63 validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
64
65 // While this email regex is definitely not perfect and I'm sure you can come up
66 // with edge cases, it is still accepted by the CommonMark specification, as
67 // well as the HTML5 spec:
68 // http://spec.commonmark.org/0.28/#email-address
69 // https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
70 emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))")
71
72 // blackfriday extensions create IDs like fn:user-content-footnote
73 blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`)
74
75 // EmojiShortCodeRegex find emoji by alias like :smile:
76 EmojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)
77
78 InlineCodeBlockRegex = regexp.MustCompile("`[^`]+`")
79)
80
81// CSS class for action keywords (e.g. "closes: #1")
82const keywordClass = "issue-keyword"
83
84// IsLink reports whether link fits valid format.
85func IsLink(link []byte) bool {
86 return validLinksPattern.Match(link)
87}
88
89func IsLinkStr(link string) bool {
90 return validLinksPattern.MatchString(link)
91}
92
93// regexp for full links to issues/pulls
94var issueFullPattern *regexp.Regexp
95
96// Once for to prevent races
97var issueFullPatternOnce sync.Once
98
99func getIssueFullPattern() *regexp.Regexp {
100 issueFullPatternOnce.Do(func() {
101 // example: https://domain/org/repo/pulls/27#hash
102 issueFullPattern = regexp.MustCompile(regexp.QuoteMeta(setting.AppURL) +
103 `(?P<user>[\w_.-]+)\/(?P<repo>[\w_.-]+)\/(?:issues|pulls)\/(?P<num>(?:\w{1,10}-)?[1-9][0-9]*)(?P<subpath>\/[\w_.-]+)?(?:(?P<comment>#(?:issue|issuecomment)-\d+)|(?:[\?#](?:\S+)?))?\b`)
104 })
105 return issueFullPattern
106}
107
108// CustomLinkURLSchemes allows for additional schemes to be detected when parsing links within text
109func CustomLinkURLSchemes(schemes []string) {
110 schemes = append(schemes, "http", "https")
111 withAuth := make([]string, 0, len(schemes))
112 validScheme := regexp.MustCompile(`^[a-z]+$`)
113 for _, s := range schemes {
114 if !validScheme.MatchString(s) {
115 continue
116 }
117 without := false
118 for _, sna := range xurls.SchemesNoAuthority {
119 if s == sna {
120 without = true
121 break
122 }
123 }
124 if without {
125 s += ":"
126 } else {
127 s += "://"
128 }
129 withAuth = append(withAuth, s)
130 }
131 common.LinkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|"))
132}
133
134type postProcessError struct {
135 context string
136 err error
137}
138
139func (p *postProcessError) Error() string {
140 return "PostProcess: " + p.context + ", " + p.err.Error()
141}
142
143type processor func(ctx *RenderContext, node *html.Node)
144
145var defaultProcessors = []processor{
146 fullIssuePatternProcessor,
147 comparePatternProcessor,
148 filePreviewPatternProcessor,
149 fullHashPatternProcessor,
150 shortLinkProcessor,
151 linkProcessor,
152 mentionProcessor,
153 issueIndexPatternProcessor,
154 commitCrossReferencePatternProcessor,
155 hashCurrentPatternProcessor,
156 emailAddressProcessor,
157 emojiProcessor,
158 emojiShortCodeProcessor,
159}
160
161// PostProcess does the final required transformations to the passed raw HTML
162// data, and ensures its validity. Transformations include: replacing links and
163// emails with HTML links, parsing shortlinks in the format of [[Link]], like
164// MediaWiki, linking issues in the format #ID, and mentions in the format
165// @user, and others.
166func PostProcess(
167 ctx *RenderContext,
168 input io.Reader,
169 output io.Writer,
170) error {
171 return postProcess(ctx, defaultProcessors, input, output)
172}
173
174var commitMessageProcessors = []processor{
175 fullIssuePatternProcessor,
176 comparePatternProcessor,
177 fullHashPatternProcessor,
178 linkProcessor,
179 mentionProcessor,
180 issueIndexPatternProcessor,
181 commitCrossReferencePatternProcessor,
182 hashCurrentPatternProcessor,
183 emailAddressProcessor,
184 emojiProcessor,
185 emojiShortCodeProcessor,
186}
187
188// RenderCommitMessage will use the same logic as PostProcess, but will disable
189// the shortLinkProcessor and will add a defaultLinkProcessor if defaultLink is
190// set, which changes every text node into a link to the passed default link.
191func RenderCommitMessage(
192 ctx *RenderContext,
193 content string,
194) (string, error) {
195 procs := commitMessageProcessors
196 if ctx.DefaultLink != "" {
197 // we don't have to fear data races, because being
198 // commitMessageProcessors of fixed len and cap, every time we append
199 // something to it the slice is realloc+copied, so append always
200 // generates the slice ex-novo.
201 procs = append(procs, genDefaultLinkProcessor(ctx.DefaultLink))
202 }
203 return renderProcessString(ctx, procs, content)
204}
205
206var commitMessageSubjectProcessors = []processor{
207 fullIssuePatternProcessor,
208 comparePatternProcessor,
209 fullHashPatternProcessor,
210 linkProcessor,
211 mentionProcessor,
212 issueIndexPatternProcessor,
213 commitCrossReferencePatternProcessor,
214 hashCurrentPatternProcessor,
215 emojiShortCodeProcessor,
216 emojiProcessor,
217}
218
219var emojiProcessors = []processor{
220 emojiShortCodeProcessor,
221 emojiProcessor,
222}
223
224// RenderCommitMessageSubject will use the same logic as PostProcess and
225// RenderCommitMessage, but will disable the shortLinkProcessor and
226// emailAddressProcessor, will add a defaultLinkProcessor if defaultLink is set,
227// which changes every text node into a link to the passed default link.
228func RenderCommitMessageSubject(
229 ctx *RenderContext,
230 content string,
231) (string, error) {
232 procs := commitMessageSubjectProcessors
233 if ctx.DefaultLink != "" {
234 // we don't have to fear data races, because being
235 // commitMessageSubjectProcessors of fixed len and cap, every time we
236 // append something to it the slice is realloc+copied, so append always
237 // generates the slice ex-novo.
238 procs = append(procs, genDefaultLinkProcessor(ctx.DefaultLink))
239 }
240 return renderProcessString(ctx, procs, content)
241}
242
243// RenderIssueTitle to process title on individual issue/pull page
244func RenderIssueTitle(
245 ctx *RenderContext,
246 title string,
247) (string, error) {
248 return renderProcessString(ctx, []processor{
249 inlineCodeBlockProcessor,
250 issueIndexPatternProcessor,
251 commitCrossReferencePatternProcessor,
252 hashCurrentPatternProcessor,
253 emojiShortCodeProcessor,
254 emojiProcessor,
255 }, title)
256}
257
258// RenderRefIssueTitle to process title on places where an issue is referenced
259func RenderRefIssueTitle(
260 ctx *RenderContext,
261 title string,
262) (string, error) {
263 return renderProcessString(ctx, []processor{
264 inlineCodeBlockProcessor,
265 issueIndexPatternProcessor,
266 emojiShortCodeProcessor,
267 emojiProcessor,
268 }, title)
269}
270
271func renderProcessString(ctx *RenderContext, procs []processor, content string) (string, error) {
272 var buf strings.Builder
273 if err := postProcess(ctx, procs, strings.NewReader(content), &buf); err != nil {
274 return "", err
275 }
276 return buf.String(), nil
277}
278
279// RenderDescriptionHTML will use similar logic as PostProcess, but will
280// use a single special linkProcessor.
281func RenderDescriptionHTML(
282 ctx *RenderContext,
283 content string,
284) (string, error) {
285 return renderProcessString(ctx, []processor{
286 descriptionLinkProcessor,
287 emojiShortCodeProcessor,
288 emojiProcessor,
289 }, content)
290}
291
292// RenderEmoji for when we want to just process emoji and shortcodes
293// in various places it isn't already run through the normal markdown processor
294func RenderEmoji(
295 ctx *RenderContext,
296 content string,
297) (string, error) {
298 return renderProcessString(ctx, emojiProcessors, content)
299}
300
301var (
302 tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL]\b)|(/?[hH][eE][aA][dD]\b))`)
303 nulCleaner = strings.NewReplacer("\000", "")
304)
305
306func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output io.Writer) error {
307 defer ctx.Cancel()
308 // FIXME: don't read all content to memory
309 rawHTML, err := io.ReadAll(input)
310 if err != nil {
311 return err
312 }
313
314 // parse the HTML
315 node, err := html.Parse(io.MultiReader(
316 // prepend "<html><body>"
317 strings.NewReader("<html><body>"),
318 // Strip out nuls - they're always invalid
319 bytes.NewReader(tagCleaner.ReplaceAll([]byte(nulCleaner.Replace(string(rawHTML))), []byte("<$1"))),
320 // close the tags
321 strings.NewReader("</body></html>"),
322 ))
323 if err != nil {
324 return &postProcessError{"invalid HTML", err}
325 }
326
327 if node.Type == html.DocumentNode {
328 node = node.FirstChild
329 }
330
331 visitNode(ctx, procs, node)
332
333 newNodes := make([]*html.Node, 0, 5)
334
335 if node.Data == "html" {
336 node = node.FirstChild
337 for node != nil && node.Data != "body" {
338 node = node.NextSibling
339 }
340 }
341 if node != nil {
342 if node.Data == "body" {
343 child := node.FirstChild
344 for child != nil {
345 newNodes = append(newNodes, child)
346 child = child.NextSibling
347 }
348 } else {
349 newNodes = append(newNodes, node)
350 }
351 }
352
353 // Render everything to buf.
354 for _, node := range newNodes {
355 if err := html.Render(output, node); err != nil {
356 return &postProcessError{"error rendering processed HTML", err}
357 }
358 }
359 return nil
360}
361
362func visitNode(ctx *RenderContext, procs []processor, node *html.Node) {
363 // Add user-content- to IDs and "#" links if they don't already have them
364 for idx, attr := range node.Attr {
365 val := strings.TrimPrefix(attr.Val, "#")
366 notHasPrefix := !strings.HasPrefix(val, "user-content-") && !blackfridayExtRegex.MatchString(val)
367
368 if attr.Key == "id" && notHasPrefix {
369 node.Attr[idx].Val = "user-content-" + attr.Val
370 }
371
372 if attr.Key == "href" && strings.HasPrefix(attr.Val, "#") && notHasPrefix {
373 node.Attr[idx].Val = "#user-content-" + val
374 }
375
376 if attr.Key == "class" && attr.Val == "emoji" {
377 procs = nil
378 }
379 }
380
381 // We ignore code and pre.
382 switch node.Type {
383 case html.TextNode:
384 processTextNodes(ctx, procs, node)
385 case html.ElementNode:
386 if node.Data == "img" {
387 for i, attr := range node.Attr {
388 if attr.Key != "src" {
389 continue
390 }
391 if len(attr.Val) > 0 && !IsLinkStr(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") {
392 attr.Val = util.URLJoin(ctx.Links.ResolveMediaLink(ctx.IsWiki), attr.Val)
393 }
394 attr.Val = camoHandleLink(attr.Val)
395 node.Attr[i] = attr
396 }
397 } else if node.Data == "a" {
398 // Restrict text in links to emojis
399 procs = emojiProcessors
400 } else if node.Data == "code" || node.Data == "pre" {
401 return
402 } else if node.Data == "i" {
403 for _, attr := range node.Attr {
404 if attr.Key != "class" {
405 continue
406 }
407 classes := strings.Split(attr.Val, " ")
408 for i, class := range classes {
409 if class == "icon" {
410 classes[0], classes[i] = classes[i], classes[0]
411 attr.Val = strings.Join(classes, " ")
412
413 // Remove all children of icons
414 child := node.FirstChild
415 for child != nil {
416 node.RemoveChild(child)
417 child = node.FirstChild
418 }
419 break
420 }
421 }
422 }
423 }
424 for n := node.FirstChild; n != nil; n = n.NextSibling {
425 visitNode(ctx, procs, n)
426 }
427 default:
428 }
429 // ignore everything else
430}
431
432// processTextNodes runs the passed node through various processors, in order to handle
433// all kinds of special links handled by the post-processing.
434func processTextNodes(ctx *RenderContext, procs []processor, node *html.Node) {
435 for _, p := range procs {
436 p(ctx, node)
437 }
438}
439
440// createKeyword() renders a highlighted version of an action keyword
441func createKeyword(content string) *html.Node {
442 span := &html.Node{
443 Type: html.ElementNode,
444 Data: atom.Span.String(),
445 Attr: []html.Attribute{},
446 }
447 span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: keywordClass})
448
449 text := &html.Node{
450 Type: html.TextNode,
451 Data: content,
452 }
453 span.AppendChild(text)
454
455 return span
456}
457
458func createInlineCode(content string) *html.Node {
459 code := &html.Node{
460 Type: html.ElementNode,
461 Data: atom.Code.String(),
462 Attr: []html.Attribute{},
463 }
464
465 code.Attr = append(code.Attr, html.Attribute{Key: "class", Val: "inline-code-block"})
466
467 text := &html.Node{
468 Type: html.TextNode,
469 Data: content,
470 }
471
472 code.AppendChild(text)
473 return code
474}
475
476func createEmoji(content, class, name, alias string) *html.Node {
477 span := &html.Node{
478 Type: html.ElementNode,
479 Data: atom.Span.String(),
480 Attr: []html.Attribute{},
481 }
482 if class != "" {
483 span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: class})
484 }
485 if name != "" {
486 span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: name})
487 }
488 if alias != "" {
489 span.Attr = append(span.Attr, html.Attribute{Key: "data-alias", Val: alias})
490 }
491
492 text := &html.Node{
493 Type: html.TextNode,
494 Data: content,
495 }
496
497 span.AppendChild(text)
498 return span
499}
500
501func createCustomEmoji(alias string) *html.Node {
502 span := &html.Node{
503 Type: html.ElementNode,
504 Data: atom.Span.String(),
505 Attr: []html.Attribute{},
506 }
507 span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: "emoji"})
508 span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: alias})
509 span.Attr = append(span.Attr, html.Attribute{Key: "data-alias", Val: alias})
510
511 img := &html.Node{
512 Type: html.ElementNode,
513 DataAtom: atom.Img,
514 Data: "img",
515 Attr: []html.Attribute{},
516 }
517 img.Attr = append(img.Attr, html.Attribute{Key: "alt", Val: ":" + alias + ":"})
518 img.Attr = append(img.Attr, html.Attribute{Key: "src", Val: setting.StaticURLPrefix + "/assets/img/emoji/" + alias + ".png"})
519
520 span.AppendChild(img)
521 return span
522}
523
524func createLink(href, content, class string) *html.Node {
525 a := &html.Node{
526 Type: html.ElementNode,
527 Data: atom.A.String(),
528 Attr: []html.Attribute{{Key: "href", Val: href}},
529 }
530
531 if class != "" {
532 a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class})
533 }
534
535 text := &html.Node{
536 Type: html.TextNode,
537 Data: content,
538 }
539
540 a.AppendChild(text)
541 return a
542}
543
544func createCodeLink(href, content, class string) *html.Node {
545 a := &html.Node{
546 Type: html.ElementNode,
547 Data: atom.A.String(),
548 Attr: []html.Attribute{{Key: "href", Val: href}},
549 }
550
551 if class != "" {
552 a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class})
553 }
554
555 text := &html.Node{
556 Type: html.TextNode,
557 Data: content,
558 }
559
560 code := &html.Node{
561 Type: html.ElementNode,
562 Data: atom.Code.String(),
563 Attr: []html.Attribute{{Key: "class", Val: "nohighlight"}},
564 }
565
566 code.AppendChild(text)
567 a.AppendChild(code)
568 return a
569}
570
571// replaceContent takes text node, and in its content it replaces a section of
572// it with the specified newNode.
573func replaceContent(node *html.Node, i, j int, newNode *html.Node) {
574 replaceContentList(node, i, j, []*html.Node{newNode})
575}
576
577// replaceContentList takes text node, and in its content it replaces a section of
578// it with the specified newNodes. An example to visualize how this can work can
579// be found here: https://play.golang.org/p/5zP8NnHZ03s
580func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) {
581 // get the data before and after the match
582 before := node.Data[:i]
583 after := node.Data[j:]
584
585 // Replace in the current node the text, so that it is only what it is
586 // supposed to have.
587 node.Data = before
588
589 // Get the current next sibling, before which we place the replaced data,
590 // and after that we place the new text node.
591 nextSibling := node.NextSibling
592 for _, n := range newNodes {
593 node.Parent.InsertBefore(n, nextSibling)
594 }
595 if after != "" {
596 node.Parent.InsertBefore(&html.Node{
597 Type: html.TextNode,
598 Data: after,
599 }, nextSibling)
600 }
601}
602
603func mentionProcessor(ctx *RenderContext, node *html.Node) {
604 start := 0
605 next := node.NextSibling
606 for node != nil && node != next && start < len(node.Data) {
607 // We replace only the first mention; other mentions will be addressed later
608 found, loc := references.FindFirstMentionBytes([]byte(node.Data[start:]))
609 if !found {
610 return
611 }
612 loc.Start += start
613 loc.End += start
614 mention := node.Data[loc.Start:loc.End]
615 var teams string
616 teams, ok := ctx.Metas["teams"]
617 // FIXME: util.URLJoin may not be necessary here:
618 // - setting.AppURL is defined to have a terminal '/' so unless mention[1:]
619 // is an AppSubURL link we can probably fallback to concatenation.
620 // team mention should follow @orgName/teamName style
621 if ok && strings.Contains(mention, "/") {
622 mentionOrgAndTeam := strings.Split(mention, "/")
623 if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") {
624 replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention"))
625 node = node.NextSibling.NextSibling
626 start = 0
627 continue
628 }
629 start = loc.End
630 continue
631 }
632 mentionedUsername := mention[1:]
633
634 if DefaultProcessorHelper.IsUsernameMentionable != nil && DefaultProcessorHelper.IsUsernameMentionable(ctx.Ctx, mentionedUsername) {
635 replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), mentionedUsername), mention, "mention"))
636 node = node.NextSibling.NextSibling
637 start = 0
638 } else {
639 start = loc.End
640 }
641 }
642}
643
644func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
645 next := node.NextSibling
646 for node != nil && node != next {
647 m := shortLinkPattern.FindStringSubmatchIndex(node.Data)
648 if m == nil {
649 return
650 }
651
652 content := node.Data[m[2]:m[3]]
653 tail := node.Data[m[4]:m[5]]
654 props := make(map[string]string)
655
656 // MediaWiki uses [[link|text]], while GitHub uses [[text|link]]
657 // It makes page handling terrible, but we prefer GitHub syntax
658 // And fall back to MediaWiki only when it is obvious from the look
659 // Of text and link contents
660 sl := strings.Split(content, "|")
661 for _, v := range sl {
662 if equalPos := strings.IndexByte(v, '='); equalPos == -1 {
663 // There is no equal in this argument; this is a mandatory arg
664 if props["name"] == "" {
665 if IsLinkStr(v) {
666 // If we clearly see it is a link, we save it so
667
668 // But first we need to ensure, that if both mandatory args provided
669 // look like links, we stick to GitHub syntax
670 if props["link"] != "" {
671 props["name"] = props["link"]
672 }
673
674 props["link"] = strings.TrimSpace(v)
675 } else {
676 props["name"] = v
677 }
678 } else {
679 props["link"] = strings.TrimSpace(v)
680 }
681 } else {
682 // There is an equal; optional argument.
683
684 sep := strings.IndexByte(v, '=')
685 key, val := v[:sep], html.UnescapeString(v[sep+1:])
686
687 // When parsing HTML, x/net/html will change all quotes which are
688 // not used for syntax into UTF-8 quotes. So checking val[0] won't
689 // be enough, since that only checks a single byte.
690 if len(val) > 1 {
691 if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) ||
692 (strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) {
693 const lenQuote = len("‘")
694 val = val[lenQuote : len(val)-lenQuote]
695 } else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) ||
696 (strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) {
697 val = val[1 : len(val)-1]
698 } else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") {
699 const lenQuote = len("‘")
700 val = val[1 : len(val)-lenQuote]
701 }
702 }
703 props[key] = val
704 }
705 }
706
707 var name, link string
708 if props["link"] != "" {
709 link = props["link"]
710 } else if props["name"] != "" {
711 link = props["name"]
712 }
713 if props["title"] != "" {
714 name = props["title"]
715 } else if props["name"] != "" {
716 name = props["name"]
717 } else {
718 name = link
719 }
720
721 name += tail
722 image := false
723 switch ext := filepath.Ext(link); ext {
724 // fast path: empty string, ignore
725 case "":
726 // leave image as false
727 case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg":
728 image = true
729 }
730
731 childNode := &html.Node{}
732 linkNode := &html.Node{
733 FirstChild: childNode,
734 LastChild: childNode,
735 Type: html.ElementNode,
736 Data: "a",
737 DataAtom: atom.A,
738 }
739 childNode.Parent = linkNode
740 absoluteLink := IsLinkStr(link)
741 if !absoluteLink {
742 if image {
743 link = strings.ReplaceAll(link, " ", "+")
744 } else {
745 link = strings.ReplaceAll(link, " ", "-")
746 }
747 if !strings.Contains(link, "/") {
748 link = url.PathEscape(link)
749 }
750 }
751 if image {
752 if !absoluteLink {
753 link = util.URLJoin(ctx.Links.ResolveMediaLink(ctx.IsWiki), link)
754 }
755 title := props["title"]
756 if title == "" {
757 title = props["alt"]
758 }
759 if title == "" {
760 title = path.Base(name)
761 }
762 alt := props["alt"]
763
764 // make the childNode an image - if we can, we also place the alt
765 childNode.Type = html.ElementNode
766 childNode.Data = "img"
767 childNode.DataAtom = atom.Img
768 childNode.Attr = []html.Attribute{
769 {Key: "src", Val: link},
770 {Key: "title", Val: title},
771 {Key: "alt", Val: alt},
772 }
773 } else {
774 if !absoluteLink {
775 if ctx.IsWiki {
776 link = util.URLJoin(ctx.Links.WikiLink(), link)
777 } else {
778 link = util.URLJoin(ctx.Links.SrcLink(), link)
779 }
780 }
781 childNode.Type = html.TextNode
782 childNode.Data = name
783 }
784 linkNode.Attr = []html.Attribute{{Key: "href", Val: link}}
785 replaceContent(node, m[0], m[1], linkNode)
786 node = node.NextSibling.NextSibling
787 }
788}
789
790func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) {
791 if ctx.Metas == nil {
792 return
793 }
794 next := node.NextSibling
795 for node != nil && node != next {
796 re := getIssueFullPattern()
797 linkIndex, m := re.FindStringIndex(node.Data), re.FindStringSubmatch(node.Data)
798 if linkIndex == nil || m == nil {
799 return
800 }
801
802 link := node.Data[linkIndex[0]:linkIndex[1]]
803 text := "#" + m[re.SubexpIndex("num")] + m[re.SubexpIndex("subpath")]
804
805 if len(m[re.SubexpIndex("comment")]) > 0 {
806 if locale, ok := ctx.Ctx.Value(translation.ContextKey).(translation.Locale); ok {
807 text += " " + locale.TrString("repo.from_comment")
808 } else {
809 text += " (comment)"
810 }
811 }
812
813 matchUser := m[re.SubexpIndex("user")]
814 matchRepo := m[re.SubexpIndex("repo")]
815
816 if matchUser == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] {
817 replaceContent(node, linkIndex[0], linkIndex[1], createLink(link, text, "ref-issue"))
818 } else {
819 text = matchUser + "/" + matchRepo + text
820 replaceContent(node, linkIndex[0], linkIndex[1], createLink(link, text, "ref-issue"))
821 }
822 node = node.NextSibling.NextSibling
823 }
824}
825
826func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
827 if ctx.Metas == nil {
828 return
829 }
830
831 // FIXME: the use of "mode" is quite dirty and hacky, for example: what is a "document"? how should it be rendered?
832 // The "mode" approach should be refactored to some other more clear&reliable way.
833 crossLinkOnly := (ctx.Metas["mode"] == "document" && !ctx.IsWiki)
834
835 var (
836 found bool
837 ref *references.RenderizableReference
838 )
839
840 next := node.NextSibling
841
842 for node != nil && node != next {
843 _, hasExtTrackFormat := ctx.Metas["format"]
844
845 // Repos with external issue trackers might still need to reference local PRs
846 // We need to concern with the first one that shows up in the text, whichever it is
847 isNumericStyle := ctx.Metas["style"] == "" || ctx.Metas["style"] == IssueNameStyleNumeric
848 foundNumeric, refNumeric := references.FindRenderizableReferenceNumeric(node.Data, hasExtTrackFormat && !isNumericStyle, crossLinkOnly)
849
850 switch ctx.Metas["style"] {
851 case "", IssueNameStyleNumeric:
852 found, ref = foundNumeric, refNumeric
853 case IssueNameStyleAlphanumeric:
854 found, ref = references.FindRenderizableReferenceAlphanumeric(node.Data)
855 case IssueNameStyleRegexp:
856 pattern, err := regexplru.GetCompiled(ctx.Metas["regexp"])
857 if err != nil {
858 return
859 }
860 found, ref = references.FindRenderizableReferenceRegexp(node.Data, pattern)
861 }
862
863 // Repos with external issue trackers might still need to reference local PRs
864 // We need to concern with the first one that shows up in the text, whichever it is
865 if hasExtTrackFormat && !isNumericStyle && refNumeric != nil {
866 // If numeric (PR) was found, and it was BEFORE the non-numeric pattern, use that
867 // Allow a free-pass when non-numeric pattern wasn't found.
868 if found && (ref == nil || refNumeric.RefLocation.Start < ref.RefLocation.Start) {
869 found = foundNumeric
870 ref = refNumeric
871 }
872 }
873 if !found {
874 return
875 }
876
877 var link *html.Node
878 reftext := node.Data[ref.RefLocation.Start:ref.RefLocation.End]
879 if hasExtTrackFormat && !ref.IsPull && ref.Owner == "" {
880 ctx.Metas["index"] = ref.Issue
881
882 res, err := vars.Expand(ctx.Metas["format"], ctx.Metas)
883 if err != nil {
884 // here we could just log the error and continue the rendering
885 log.Error("unable to expand template vars for ref %s, err: %v", ref.Issue, err)
886 }
887
888 link = createLink(res, reftext, "ref-issue ref-external-issue")
889 } else {
890 // Path determines the type of link that will be rendered. It's unknown at this point whether
891 // the linked item is actually a PR or an issue. Luckily it's of no real consequence because
892 // Forgejo will redirect on click as appropriate.
893 path := "issues"
894 if ref.IsPull {
895 path = "pulls"
896 }
897 if ref.Owner == "" {
898 link = createLink(util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], path, ref.Issue), reftext, "ref-issue")
899 } else {
900 link = createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, path, ref.Issue), reftext, "ref-issue")
901 }
902 }
903
904 if ref.Action == references.XRefActionNone {
905 replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link)
906 node = node.NextSibling.NextSibling
907 continue
908 }
909
910 // Decorate action keywords if actionable
911 var keyword *html.Node
912 if references.IsXrefActionable(ref, hasExtTrackFormat) {
913 keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End])
914 } else {
915 keyword = &html.Node{
916 Type: html.TextNode,
917 Data: node.Data[ref.ActionLocation.Start:ref.ActionLocation.End],
918 }
919 }
920 spaces := &html.Node{
921 Type: html.TextNode,
922 Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start],
923 }
924 replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link})
925 node = node.NextSibling.NextSibling.NextSibling.NextSibling
926 }
927}
928
929func commitCrossReferencePatternProcessor(ctx *RenderContext, node *html.Node) {
930 next := node.NextSibling
931
932 for node != nil && node != next {
933 found, ref := references.FindRenderizableCommitCrossReference(node.Data)
934 if !found {
935 return
936 }
937
938 reftext := ref.Owner + "/" + ref.Name + "@" + base.ShortSha(ref.CommitSha)
939 link := createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, "commit", ref.CommitSha), reftext, "commit")
940
941 replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link)
942 node = node.NextSibling.NextSibling
943 }
944}
945
946// fullHashPatternProcessor renders SHA containing URLs
947func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) {
948 if ctx.Metas == nil {
949 return
950 }
951
952 next := node.NextSibling
953 for node != nil && node != next {
954 m := anyHashPattern.FindStringSubmatchIndex(node.Data)
955 if m == nil {
956 return
957 }
958
959 urlFull := node.Data[m[0]:m[1]]
960 text := base.ShortSha(node.Data[m[2]:m[3]])
961
962 // 3rd capture group matches a optional path
963 subpath := ""
964 if m[5] > 0 {
965 subpath = node.Data[m[4]:m[5]]
966 }
967
968 // 5th capture group matches a optional url hash
969 hash := ""
970 if m[9] > 0 {
971 hash = node.Data[m[8]:m[9]][1:]
972 }
973
974 start := m[0]
975 end := m[1]
976
977 // If url ends in '.', it's very likely that it is not part of the
978 // actual url but used to finish a sentence.
979 if strings.HasSuffix(urlFull, ".") {
980 end--
981 urlFull = urlFull[:len(urlFull)-1]
982 if hash != "" {
983 hash = hash[:len(hash)-1]
984 } else if subpath != "" {
985 subpath = subpath[:len(subpath)-1]
986 }
987 }
988
989 if subpath != "" {
990 text += subpath
991 }
992
993 if hash != "" {
994 text += " (" + hash + ")"
995 }
996 replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
997 node = node.NextSibling.NextSibling
998 }
999}
1000
1001func comparePatternProcessor(ctx *RenderContext, node *html.Node) {
1002 if ctx.Metas == nil {
1003 return
1004 }
1005
1006 next := node.NextSibling
1007 for node != nil && node != next {
1008 m := comparePattern.FindStringSubmatchIndex(node.Data)
1009 if m == nil {
1010 return
1011 }
1012
1013 // Ensure that every group (m[0]...m[7]) has a match
1014 for i := 0; i < 8; i++ {
1015 if m[i] == -1 {
1016 return
1017 }
1018 }
1019
1020 urlFull := node.Data[m[0]:m[1]]
1021 text1 := base.ShortSha(node.Data[m[2]:m[3]])
1022 textDots := base.ShortSha(node.Data[m[4]:m[5]])
1023 text2 := base.ShortSha(node.Data[m[6]:m[7]])
1024
1025 hash := ""
1026 if m[9] > 0 {
1027 hash = node.Data[m[8]:m[9]][1:]
1028 }
1029
1030 start := m[0]
1031 end := m[1]
1032
1033 // If url ends in '.', it's very likely that it is not part of the
1034 // actual url but used to finish a sentence.
1035 if strings.HasSuffix(urlFull, ".") {
1036 end--
1037 urlFull = urlFull[:len(urlFull)-1]
1038 if hash != "" {
1039 hash = hash[:len(hash)-1]
1040 } else if text2 != "" {
1041 text2 = text2[:len(text2)-1]
1042 }
1043 }
1044
1045 text := text1 + textDots + text2
1046 if hash != "" {
1047 text += " (" + hash + ")"
1048 }
1049 replaceContent(node, start, end, createCodeLink(urlFull, text, "compare"))
1050 node = node.NextSibling.NextSibling
1051 }
1052}
1053
1054func filePreviewPatternProcessor(ctx *RenderContext, node *html.Node) {
1055 if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" {
1056 return
1057 }
1058 if DefaultProcessorHelper.GetRepoFileBlob == nil {
1059 return
1060 }
1061
1062 locale := translation.NewLocale("en-US")
1063 if ctx.Ctx != nil {
1064 ctxLocale, ok := ctx.Ctx.Value(translation.ContextKey).(translation.Locale)
1065 if ok {
1066 locale = ctxLocale
1067 }
1068 }
1069
1070 next := node.NextSibling
1071 for node != nil && node != next {
1072 previews := NewFilePreviews(ctx, node, locale)
1073 if previews == nil {
1074 node = node.NextSibling
1075 continue
1076 }
1077
1078 offset := 0
1079 for _, preview := range previews {
1080 previewNode := preview.CreateHTML(locale)
1081
1082 // Specialized version of replaceContent, so the parent paragraph element is not destroyed from our div
1083 before := node.Data[:(preview.start - offset)]
1084 after := node.Data[(preview.end - offset):]
1085 afterPrefix := "<p>"
1086 offset = preview.end - len(afterPrefix)
1087 node.Data = before
1088 nextSibling := node.NextSibling
1089 node.Parent.InsertBefore(&html.Node{
1090 Type: html.RawNode,
1091 Data: "</p>",
1092 }, nextSibling)
1093 node.Parent.InsertBefore(previewNode, nextSibling)
1094 afterNode := &html.Node{
1095 Type: html.RawNode,
1096 Data: afterPrefix + after,
1097 }
1098 node.Parent.InsertBefore(afterNode, nextSibling)
1099 node = afterNode
1100 }
1101
1102 node = node.NextSibling
1103 }
1104}
1105
1106func inlineCodeBlockProcessor(ctx *RenderContext, node *html.Node) {
1107 start := 0
1108 next := node.NextSibling
1109 for node != nil && node != next && start < len(node.Data) {
1110 m := InlineCodeBlockRegex.FindStringSubmatchIndex(node.Data[start:])
1111 if m == nil {
1112 return
1113 }
1114
1115 code := node.Data[m[0]+1 : m[1]-1]
1116 replaceContent(node, m[0], m[1], createInlineCode(code))
1117 node = node.NextSibling.NextSibling
1118 }
1119}
1120
1121// emojiShortCodeProcessor for rendering text like :smile: into emoji
1122func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
1123 start := 0
1124 next := node.NextSibling
1125 for node != nil && node != next && start < len(node.Data) {
1126 m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:])
1127 if m == nil {
1128 return
1129 }
1130 m[0] += start
1131 m[1] += start
1132
1133 start = m[1]
1134
1135 alias := node.Data[m[0]:m[1]]
1136 alias = strings.ReplaceAll(alias, ":", "")
1137 converted := emoji.FromAlias(alias)
1138 if converted == nil {
1139 // check if this is a custom reaction
1140 if _, exist := setting.UI.CustomEmojisMap[alias]; exist {
1141 replaceContent(node, m[0], m[1], createCustomEmoji(alias))
1142 node = node.NextSibling.NextSibling
1143 start = 0
1144 continue
1145 }
1146 continue
1147 }
1148
1149 replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description, alias))
1150 node = node.NextSibling.NextSibling
1151 start = 0
1152 }
1153}
1154
1155// emoji processor to match emoji and add emoji class
1156func emojiProcessor(ctx *RenderContext, node *html.Node) {
1157 start := 0
1158 next := node.NextSibling
1159 for node != nil && node != next && start < len(node.Data) {
1160 m := emoji.FindEmojiSubmatchIndex(node.Data[start:])
1161 if m == nil {
1162 return
1163 }
1164 m[0] += start
1165 m[1] += start
1166
1167 codepoint := node.Data[m[0]:m[1]]
1168 start = m[1]
1169 val := emoji.FromCode(codepoint)
1170 if val != nil {
1171 replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description, val.Aliases[0]))
1172 node = node.NextSibling.NextSibling
1173 start = 0
1174 }
1175 }
1176}
1177
1178// hashCurrentPatternProcessor renders SHA1/SHA256 strings to corresponding links that
1179// are assumed to be in the same repository.
1180func hashCurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
1181 if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || ctx.Metas["repoPath"] == "" {
1182 return
1183 }
1184
1185 start := 0
1186 next := node.NextSibling
1187 if ctx.ShaExistCache == nil {
1188 ctx.ShaExistCache = make(map[string]bool)
1189 }
1190 for node != nil && node != next && start < len(node.Data) {
1191 m := hashCurrentPattern.FindStringSubmatchIndex(node.Data[start:])
1192 if m == nil {
1193 return
1194 }
1195 m[2] += start
1196 m[3] += start
1197
1198 hash := node.Data[m[2]:m[3]]
1199 // The regex does not lie, it matches the hash pattern.
1200 // However, a regex cannot know if a hash actually exists or not.
1201 // We could assume that a SHA1 hash should probably contain alphas AND numerics
1202 // but that is not always the case.
1203 // Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash
1204 // as used by git and github for linking and thus we have to do similar.
1205 // Because of this, we check to make sure that a matched hash is actually
1206 // a commit in the repository before making it a link.
1207
1208 // check cache first
1209 exist, inCache := ctx.ShaExistCache[hash]
1210 if !inCache {
1211 if ctx.GitRepo == nil {
1212 var err error
1213 ctx.GitRepo, err = git.OpenRepository(ctx.Ctx, ctx.Metas["repoPath"])
1214 if err != nil {
1215 log.Error("unable to open repository: %s Error: %v", ctx.Metas["repoPath"], err)
1216 return
1217 }
1218 ctx.AddCancel(func() {
1219 ctx.GitRepo.Close()
1220 ctx.GitRepo = nil
1221 })
1222 }
1223
1224 exist = ctx.GitRepo.IsReferenceExist(hash)
1225 ctx.ShaExistCache[hash] = exist
1226 }
1227
1228 if !exist {
1229 start = m[3]
1230 continue
1231 }
1232
1233 link := util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], "commit", hash)
1234 replaceContent(node, m[2], m[3], createCodeLink(link, base.ShortSha(hash), "commit"))
1235 start = 0
1236 node = node.NextSibling.NextSibling
1237 }
1238}
1239
1240// emailAddressProcessor replaces raw email addresses with a mailto: link.
1241func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
1242 next := node.NextSibling
1243 for node != nil && node != next {
1244 m := emailRegex.FindStringSubmatchIndex(node.Data)
1245 if m == nil {
1246 return
1247 }
1248
1249 mail := node.Data[m[2]:m[3]]
1250 replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto"))
1251 node = node.NextSibling.NextSibling
1252 }
1253}
1254
1255// linkProcessor creates links for any HTTP or HTTPS URL not captured by
1256// markdown.
1257func linkProcessor(ctx *RenderContext, node *html.Node) {
1258 next := node.NextSibling
1259 for node != nil && node != next {
1260 m := common.LinkRegex.FindStringIndex(node.Data)
1261 if m == nil {
1262 return
1263 }
1264
1265 uri := node.Data[m[0]:m[1]]
1266 replaceContent(node, m[0], m[1], createLink(uri, uri, "link"))
1267 node = node.NextSibling.NextSibling
1268 }
1269}
1270
1271func genDefaultLinkProcessor(defaultLink string) processor {
1272 return func(ctx *RenderContext, node *html.Node) {
1273 ch := &html.Node{
1274 Parent: node,
1275 Type: html.TextNode,
1276 Data: node.Data,
1277 }
1278
1279 node.Type = html.ElementNode
1280 node.Data = "a"
1281 node.DataAtom = atom.A
1282 node.Attr = []html.Attribute{
1283 {Key: "href", Val: defaultLink},
1284 {Key: "class", Val: "default-link muted"},
1285 }
1286 node.FirstChild, node.LastChild = ch, ch
1287 }
1288}
1289
1290// descriptionLinkProcessor creates links for DescriptionHTML
1291func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) {
1292 next := node.NextSibling
1293 for node != nil && node != next {
1294 m := common.LinkRegex.FindStringIndex(node.Data)
1295 if m == nil {
1296 return
1297 }
1298
1299 uri := node.Data[m[0]:m[1]]
1300 replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri))
1301 node = node.NextSibling.NextSibling
1302 }
1303}
1304
1305func createDescriptionLink(href, content string) *html.Node {
1306 textNode := &html.Node{
1307 Type: html.TextNode,
1308 Data: content,
1309 }
1310 linkNode := &html.Node{
1311 FirstChild: textNode,
1312 LastChild: textNode,
1313 Type: html.ElementNode,
1314 Data: "a",
1315 DataAtom: atom.A,
1316 Attr: []html.Attribute{
1317 {Key: "href", Val: href},
1318 {Key: "target", Val: "_blank"},
1319 {Key: "rel", Val: "noopener noreferrer"},
1320 },
1321 }
1322 textNode.Parent = linkNode
1323 return linkNode
1324}