forked from
tangled.org/core
Monorepo for Tangled
1// Package markup is an umbrella package for all markups and their renderers.
2package markup
3
4import (
5 "bytes"
6 "fmt"
7 "io"
8 "io/fs"
9 "net/url"
10 "path"
11 "strings"
12
13 chromahtml "github.com/alecthomas/chroma/v2/formatters/html"
14 "github.com/alecthomas/chroma/v2/styles"
15 "github.com/yuin/goldmark"
16 emoji "github.com/yuin/goldmark-emoji"
17 highlighting "github.com/yuin/goldmark-highlighting/v2"
18 "github.com/yuin/goldmark/ast"
19 "github.com/yuin/goldmark/extension"
20 "github.com/yuin/goldmark/parser"
21 "github.com/yuin/goldmark/renderer/html"
22 "github.com/yuin/goldmark/text"
23 "github.com/yuin/goldmark/util"
24 callout "gitlab.com/staticnoise/goldmark-callout"
25 "go.abhg.dev/goldmark/mermaid"
26 htmlparse "golang.org/x/net/html"
27
28 "tangled.org/core/api/tangled"
29 textension "tangled.org/core/appview/pages/markup/extension"
30 "tangled.org/core/appview/pages/repoinfo"
31)
32
33// RendererType defines the type of renderer to use based on context
34type RendererType int
35
36const (
37 // RendererTypeRepoMarkdown is for repository documentation markdown files
38 RendererTypeRepoMarkdown RendererType = iota
39 // RendererTypeDefault is non-repo markdown, like issues/pulls/comments.
40 RendererTypeDefault
41)
42
43// RenderContext holds the contextual data for rendering markdown.
44// It can be initialized empty, and that'll skip any transformations.
45type RenderContext struct {
46 CamoUrl string
47 CamoSecret string
48 repoinfo.RepoInfo
49 IsDev bool
50 Hostname string
51 RendererType RendererType
52 Sanitizer Sanitizer
53 Files fs.FS
54}
55
56func NewMarkdown(hostname string, extra ...goldmark.Extender) goldmark.Markdown {
57 exts := []goldmark.Extender{
58 extension.GFM,
59 &mermaid.Extender{
60 RenderMode: mermaid.RenderModeClient,
61 NoScript: true,
62 },
63 highlighting.NewHighlighting(
64 highlighting.WithFormatOptions(
65 chromahtml.Standalone(false),
66 chromahtml.WithClasses(true),
67 ),
68 highlighting.WithCustomStyle(styles.Get("catppuccin-latte")),
69 ),
70 extension.NewFootnote(
71 extension.WithFootnoteIDPrefix([]byte("footnote")),
72 ),
73 callout.CalloutExtention,
74 textension.AtExt,
75 textension.NewTangledLinkExt(hostname),
76 emoji.Emoji,
77 }
78 exts = append(exts, extra...)
79 md := goldmark.New(
80 goldmark.WithExtensions(exts...),
81 goldmark.WithParserOptions(
82 parser.WithAutoHeadingID(),
83 ),
84 goldmark.WithRendererOptions(html.WithUnsafe()),
85 )
86 return md
87}
88
89// clone creates a shallow copy of the RenderContext
90func (rctx *RenderContext) Clone() *RenderContext {
91 if rctx == nil {
92 return nil
93 }
94 clone := *rctx
95 return &clone
96}
97
98// NewMarkdownWith is an alias for NewMarkdown with extra extensions.
99func NewMarkdownWith(hostname string, extra ...goldmark.Extender) goldmark.Markdown {
100 return NewMarkdown(hostname, extra...)
101}
102
103func (rctx *RenderContext) RenderMarkdown(source string) string {
104 return rctx.RenderMarkdownWith(source, NewMarkdown(rctx.Hostname))
105}
106
107func (rctx *RenderContext) RenderMarkdownWith(source string, md goldmark.Markdown) string {
108 if rctx != nil {
109 var transformers []util.PrioritizedValue
110
111 transformers = append(transformers, util.Prioritized(&MarkdownTransformer{rctx: rctx}, 10000))
112
113 md.Parser().AddOptions(
114 parser.WithASTTransformers(transformers...),
115 )
116 }
117
118 var buf bytes.Buffer
119 if err := md.Convert([]byte(source), &buf); err != nil {
120 return source
121 }
122
123 var processed strings.Builder
124 if err := postProcess(rctx, strings.NewReader(buf.String()), &processed); err != nil {
125 return source
126 }
127
128 return processed.String()
129}
130
131func postProcess(ctx *RenderContext, input io.Reader, output io.Writer) error {
132 node, err := htmlparse.Parse(io.MultiReader(
133 strings.NewReader("<html><body>"),
134 input,
135 strings.NewReader("</body></html>"),
136 ))
137 if err != nil {
138 return fmt.Errorf("failed to parse html: %w", err)
139 }
140
141 if node.Type == htmlparse.DocumentNode {
142 node = node.FirstChild
143 }
144
145 visitNode(ctx, node)
146
147 newNodes := make([]*htmlparse.Node, 0, 5)
148
149 if node.Data == "html" {
150 node = node.FirstChild
151 for node != nil && node.Data != "body" {
152 node = node.NextSibling
153 }
154 }
155 if node != nil {
156 if node.Data == "body" {
157 child := node.FirstChild
158 for child != nil {
159 newNodes = append(newNodes, child)
160 child = child.NextSibling
161 }
162 } else {
163 newNodes = append(newNodes, node)
164 }
165 }
166
167 for _, node := range newNodes {
168 if err := htmlparse.Render(output, node); err != nil {
169 return fmt.Errorf("failed to render processed html: %w", err)
170 }
171 }
172
173 return nil
174}
175
176func visitNode(ctx *RenderContext, node *htmlparse.Node) {
177 switch node.Type {
178 case htmlparse.ElementNode:
179 switch node.Data {
180 case "img", "source":
181 for i, attr := range node.Attr {
182 if attr.Key != "src" {
183 continue
184 }
185
186 camoUrl, _ := url.Parse(ctx.CamoUrl)
187 dstUrl, _ := url.Parse(attr.Val)
188 if dstUrl.Host != camoUrl.Host {
189 attr.Val = ctx.imageFromKnotTransformer(attr.Val)
190 attr.Val = ctx.camoImageLinkTransformer(attr.Val)
191 node.Attr[i] = attr
192 }
193 }
194 }
195
196 for n := node.FirstChild; n != nil; n = n.NextSibling {
197 visitNode(ctx, n)
198 }
199 default:
200 }
201}
202
203func (rctx *RenderContext) SanitizeDefault(html string) string {
204 return rctx.Sanitizer.SanitizeDefault(html)
205}
206
207func (rctx *RenderContext) SanitizeDescription(html string) string {
208 return rctx.Sanitizer.SanitizeDescription(html)
209}
210
211type MarkdownTransformer struct {
212 rctx *RenderContext
213}
214
215func (a *MarkdownTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
216 _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
217 if !entering {
218 return ast.WalkContinue, nil
219 }
220
221 switch a.rctx.RendererType {
222 case RendererTypeRepoMarkdown:
223 switch n := n.(type) {
224 case *ast.Heading:
225 a.rctx.anchorHeadingTransformer(n)
226 case *ast.Link:
227 a.rctx.relativeLinkTransformer(n)
228 case *ast.Image:
229 a.rctx.imageFromKnotAstTransformer(n)
230 a.rctx.camoImageLinkAstTransformer(n)
231 }
232 case RendererTypeDefault:
233 switch n := n.(type) {
234 case *ast.Heading:
235 a.rctx.anchorHeadingTransformer(n)
236 case *ast.Image:
237 a.rctx.imageFromKnotAstTransformer(n)
238 a.rctx.camoImageLinkAstTransformer(n)
239 }
240 }
241
242 return ast.WalkContinue, nil
243 })
244}
245
246func (rctx *RenderContext) relativeLinkTransformer(link *ast.Link) {
247
248 dst := string(link.Destination)
249
250 if isAbsoluteUrl(dst) || isFragment(dst) || isMail(dst) {
251 return
252 }
253
254 actualPath := rctx.actualPath(dst)
255
256 newPath := path.Join("/", rctx.RepoInfo.FullName(), "tree", rctx.RepoInfo.Ref, actualPath)
257 link.Destination = []byte(newPath)
258}
259
260func (rctx *RenderContext) imageFromKnotTransformer(dst string) string {
261 if isAbsoluteUrl(dst) {
262 return dst
263 }
264
265 scheme := "https"
266 if rctx.IsDev {
267 scheme = "http"
268 }
269
270 actualPath := rctx.actualPath(dst)
271
272 repoName := fmt.Sprintf("%s/%s", rctx.RepoInfo.OwnerDid, rctx.RepoInfo.Name)
273
274 query := fmt.Sprintf("repo=%s&ref=%s&path=%s&raw=true",
275 url.QueryEscape(repoName), url.QueryEscape(rctx.RepoInfo.Ref), actualPath)
276
277 parsedURL := &url.URL{
278 Scheme: scheme,
279 Host: rctx.Knot,
280 Path: path.Join("/xrpc", tangled.RepoBlobNSID),
281 RawQuery: query,
282 }
283 newPath := parsedURL.String()
284 return newPath
285}
286
287func (rctx *RenderContext) imageFromKnotAstTransformer(img *ast.Image) {
288 dst := string(img.Destination)
289 img.Destination = []byte(rctx.imageFromKnotTransformer(dst))
290}
291
292func (rctx *RenderContext) anchorHeadingTransformer(h *ast.Heading) {
293 idGeneric, exists := h.AttributeString("id")
294 if !exists {
295 return // no id, nothing to do
296 }
297 id, ok := idGeneric.([]byte)
298 if !ok {
299 return
300 }
301
302 // create anchor link
303 anchor := ast.NewLink()
304 anchor.Destination = fmt.Appendf(nil, "#%s", string(id))
305 anchor.SetAttribute([]byte("class"), []byte("anchor"))
306
307 // create icon text
308 iconText := ast.NewString([]byte("#"))
309 anchor.AppendChild(anchor, iconText)
310
311 // set class on heading
312 h.SetAttribute([]byte("class"), []byte("heading"))
313
314 // append anchor to heading
315 h.AppendChild(h, anchor)
316}
317
318// actualPath decides when to join the file path with the
319// current repository directory (essentially only when the link
320// destination is relative. if it's absolute then we assume the
321// user knows what they're doing.)
322func (rctx *RenderContext) actualPath(dst string) string {
323 if path.IsAbs(dst) {
324 return dst
325 }
326
327 return path.Join(rctx.CurrentDir, dst)
328}
329
330func isAbsoluteUrl(link string) bool {
331 parsed, err := url.Parse(link)
332 if err != nil {
333 return false
334 }
335 return parsed.IsAbs()
336}
337
338func isFragment(link string) bool {
339 return strings.HasPrefix(link, "#")
340}
341
342func isMail(link string) bool {
343 return strings.HasPrefix(link, "mailto:")
344}