forked from tangled.org/core
Monorepo for Tangled

appview: markdown: introduce post-processor step to transform image links

This adds a post-processor step to the markdown parser to parse raw
html images `<img>` and resolve their `src` attributes if needed

authored by brookjeynes.dev and committed by Tangled 6dbaf7f9 9436c543

Changed files
+99 -15
appview
pages
+10 -5
appview/pages/markup/camo.go
··· 17 17 return fmt.Sprintf("%s/%s/%s", baseURL, signature, hexURL) 18 18 } 19 19 20 - func (rctx *RenderContext) camoImageLinkTransformer(img *ast.Image) { 20 + func (rctx *RenderContext) camoImageLinkTransformer(dst string) string { 21 21 // don't camo on dev 22 22 if rctx.IsDev { 23 - return 23 + return dst 24 24 } 25 25 26 - dst := string(img.Destination) 27 - 28 26 if rctx.CamoUrl != "" && rctx.CamoSecret != "" { 29 - img.Destination = []byte(generateCamoURL(rctx.CamoUrl, rctx.CamoSecret, dst)) 27 + return generateCamoURL(rctx.CamoUrl, rctx.CamoSecret, dst) 30 28 } 29 + 30 + return dst 31 + } 32 + 33 + func (rctx *RenderContext) camoImageLinkAstTransformer(img *ast.Image) { 34 + dst := string(img.Destination) 35 + img.Destination = []byte(rctx.camoImageLinkTransformer(dst)) 31 36 }
+89 -10
appview/pages/markup/markdown.go
··· 3 3 4 4 import ( 5 5 "bytes" 6 + "fmt" 7 + "io" 6 8 "net/url" 7 9 "path" 10 + "strings" 8 11 9 12 "github.com/microcosm-cc/bluemonday" 10 13 "github.com/yuin/goldmark" ··· 14 17 "github.com/yuin/goldmark/renderer/html" 15 18 "github.com/yuin/goldmark/text" 16 19 "github.com/yuin/goldmark/util" 20 + htmlparse "golang.org/x/net/html" 17 21 18 22 "tangled.sh/tangled.sh/core/appview/pages/repoinfo" 19 23 ) ··· 61 65 if err := md.Convert([]byte(source), &buf); err != nil { 62 66 return source 63 67 } 64 - return buf.String() 68 + 69 + var processed strings.Builder 70 + if err := postProcess(rctx, strings.NewReader(buf.String()), &processed); err != nil { 71 + return source 72 + } 73 + 74 + return processed.String() 75 + } 76 + 77 + func postProcess(ctx *RenderContext, input io.Reader, output io.Writer) error { 78 + node, err := htmlparse.Parse(io.MultiReader( 79 + strings.NewReader("<html><body>"), 80 + input, 81 + strings.NewReader("</body></html>"), 82 + )) 83 + if err != nil { 84 + return fmt.Errorf("failed to parse html: %w", err) 85 + } 86 + 87 + if node.Type == htmlparse.DocumentNode { 88 + node = node.FirstChild 89 + } 90 + 91 + visitNode(ctx, node) 92 + 93 + newNodes := make([]*htmlparse.Node, 0, 5) 94 + 95 + if node.Data == "html" { 96 + node = node.FirstChild 97 + for node != nil && node.Data != "body" { 98 + node = node.NextSibling 99 + } 100 + } 101 + if node != nil { 102 + if node.Data == "body" { 103 + child := node.FirstChild 104 + for child != nil { 105 + newNodes = append(newNodes, child) 106 + child = child.NextSibling 107 + } 108 + } else { 109 + newNodes = append(newNodes, node) 110 + } 111 + } 112 + 113 + for _, node := range newNodes { 114 + if err := htmlparse.Render(output, node); err != nil { 115 + return fmt.Errorf("failed to render processed html: %w", err) 116 + } 117 + } 118 + 119 + return nil 120 + } 121 + 122 + func visitNode(ctx *RenderContext, node *htmlparse.Node) { 123 + switch node.Type { 124 + case htmlparse.ElementNode: 125 + if node.Data == "img" { 126 + for i, attr := range node.Attr { 127 + if attr.Key != "src" { 128 + continue 129 + } 130 + attr.Val = ctx.imageFromKnotTransformer(attr.Val) 131 + attr.Val = ctx.camoImageLinkTransformer(attr.Val) 132 + node.Attr[i] = attr 133 + } 134 + } 135 + 136 + for n := node.FirstChild; n != nil; n = n.NextSibling { 137 + visitNode(ctx, n) 138 + } 139 + default: 140 + } 65 141 } 66 142 67 143 func (rctx *RenderContext) Sanitize(html string) string { ··· 101 177 case *ast.Link: 102 178 a.rctx.relativeLinkTransformer(n) 103 179 case *ast.Image: 104 - a.rctx.imageFromKnotTransformer(n) 105 - a.rctx.camoImageLinkTransformer(n) 180 + a.rctx.imageFromKnotAstTransformer(n) 181 + a.rctx.camoImageLinkAstTransformer(n) 106 182 } 107 183 case RendererTypeDefault: 108 184 switch n := n.(type) { 109 185 case *ast.Image: 110 - a.rctx.imageFromKnotTransformer(n) 111 - a.rctx.camoImageLinkTransformer(n) 186 + a.rctx.imageFromKnotAstTransformer(n) 187 + a.rctx.camoImageLinkAstTransformer(n) 112 188 } 113 189 } 114 190 ··· 130 206 link.Destination = []byte(newPath) 131 207 } 132 208 133 - func (rctx *RenderContext) imageFromKnotTransformer(img *ast.Image) { 134 - dst := string(img.Destination) 135 - 209 + func (rctx *RenderContext) imageFromKnotTransformer(dst string) string { 136 210 if isAbsoluteUrl(dst) { 137 - return 211 + return dst 138 212 } 139 213 140 214 scheme := "https" ··· 155 229 actualPath), 156 230 } 157 231 newPath := parsedURL.String() 158 - img.Destination = []byte(newPath) 232 + return newPath 233 + } 234 + 235 + func (rctx *RenderContext) imageFromKnotAstTransformer(img *ast.Image) { 236 + dst := string(img.Destination) 237 + img.Destination = []byte(rctx.imageFromKnotTransformer(dst)) 159 238 } 160 239 161 240 // actualPath decides when to join the file path with the