Monorepo for Tangled tangled.org

appview/pages: rework sanitizer

- initialize sanitizer once, and reuse for life
- improve policies, and allow sanitizer to hold multiple policies
(this will come in handy, for PR titles, repo description, profiles
description etc.)
- add general safe items to allow list, most of these are generated by
goldmark GFM

Signed-off-by: oppiliappan <me@oppi.li>

oppi.li 47746c7a 581ad086

verified
Changed files
+64 -13
appview
pages
+55 -6
appview/pages/markup/markdown.go
··· 7 7 "io" 8 8 "net/url" 9 9 "path" 10 + "regexp" 10 11 "strings" 11 12 12 13 "github.com/microcosm-cc/bluemonday" ··· 40 41 repoinfo.RepoInfo 41 42 IsDev bool 42 43 RendererType RendererType 44 + Sanitizer Sanitizer 45 + } 46 + 47 + type Sanitizer struct { 48 + defaultPolicy *bluemonday.Policy 43 49 } 44 50 45 51 func (rctx *RenderContext) RenderMarkdown(source string) string { ··· 145 151 } 146 152 } 147 153 148 - func (rctx *RenderContext) Sanitize(html string) string { 154 + func (rctx *RenderContext) SanitizeDefault(html string) string { 155 + return rctx.Sanitizer.defaultPolicy.Sanitize(html) 156 + } 157 + 158 + func NewSanitizer() Sanitizer { 159 + return Sanitizer{ 160 + defaultPolicy: defaultPolicy(), 161 + } 162 + } 163 + func defaultPolicy() *bluemonday.Policy { 149 164 policy := bluemonday.UGCPolicy() 150 165 166 + // Allow generally safe attributes 167 + generalSafeAttrs := []string{ 168 + "abbr", "accept", "accept-charset", 169 + "accesskey", "action", "align", "alt", 170 + "aria-describedby", "aria-hidden", "aria-label", "aria-labelledby", 171 + "axis", "border", "cellpadding", "cellspacing", "char", 172 + "charoff", "charset", "checked", 173 + "clear", "cols", "colspan", "color", 174 + "compact", "coords", "datetime", "dir", 175 + "disabled", "enctype", "for", "frame", 176 + "headers", "height", "hreflang", 177 + "hspace", "ismap", "label", "lang", 178 + "maxlength", "media", "method", 179 + "multiple", "name", "nohref", "noshade", 180 + "nowrap", "open", "prompt", "readonly", "rel", "rev", 181 + "rows", "rowspan", "rules", "scope", 182 + "selected", "shape", "size", "span", 183 + "start", "summary", "tabindex", "target", 184 + "title", "type", "usemap", "valign", "value", 185 + "vspace", "width", "itemprop", 186 + } 187 + 188 + generalSafeElements := []string{ 189 + "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt", 190 + "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label", 191 + "dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary", 192 + "details", "caption", "figure", "figcaption", 193 + "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr", 194 + } 195 + 196 + policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...) 197 + 151 198 // video 152 - policy.AllowElements("video") 153 - policy.AllowAttrs("controls").OnElements("video") 154 - policy.AllowElements("source") 155 - policy.AllowAttrs("src", "type").OnElements("source") 199 + policy.AllowAttrs("src", "autoplay", "controls").OnElements("video") 200 + 201 + // checkboxes 202 + policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") 203 + policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input") 156 204 157 205 // centering content 158 206 policy.AllowElements("center") ··· 173 221 "margin-top", 174 222 "margin-bottom", 175 223 ) 176 - return policy.Sanitize(html) 224 + 225 + return policy 177 226 } 178 227 179 228 type MarkdownTransformer struct {
+9 -7
appview/pages/pages.go
··· 57 57 IsDev: config.Core.Dev, 58 58 CamoUrl: config.Camo.Host, 59 59 CamoSecret: config.Camo.SharedSecret, 60 + Sanitizer: markup.NewSanitizer(), 60 61 } 61 62 62 63 p := &Pages{ ··· 522 523 p.rctx.RendererType = markup.RendererTypeRepoMarkdown 523 524 524 525 if params.ReadmeFileName != "" { 525 - var htmlString string 526 526 ext := filepath.Ext(params.ReadmeFileName) 527 527 switch ext { 528 528 case ".md", ".markdown", ".mdown", ".mkdn", ".mkd": 529 - htmlString = p.rctx.Sanitize(htmlString) 530 - htmlString = p.rctx.RenderMarkdown(params.Readme) 531 529 params.Raw = false 532 - params.HTMLReadme = template.HTML(htmlString) 530 + htmlString := p.rctx.RenderMarkdown(params.Readme) 531 + sanitized := p.rctx.SanitizeDefault(htmlString) 532 + params.HTMLReadme = template.HTML(sanitized) 533 533 default: 534 534 params.Raw = true 535 535 } ··· 668 668 p.rctx.RepoInfo = params.RepoInfo 669 669 p.rctx.RendererType = markup.RendererTypeRepoMarkdown 670 670 htmlString := p.rctx.RenderMarkdown(params.Contents) 671 - params.RenderedContents = template.HTML(p.rctx.Sanitize(htmlString)) 671 + sanitized := p.rctx.SanitizeDefault(htmlString) 672 + params.RenderedContents = template.HTML(sanitized) 672 673 } 673 674 } 674 675 ··· 1182 1183 if params.ShowRendered { 1183 1184 switch markup.GetFormat(params.String.Filename) { 1184 1185 case markup.FormatMarkdown: 1185 - p.rctx.RendererType = markup.RendererTypeDefault 1186 + p.rctx.RendererType = markup.RendererTypeRepoMarkdown 1186 1187 htmlString := p.rctx.RenderMarkdown(params.String.Contents) 1187 - params.RenderedContents = template.HTML(p.rctx.Sanitize(htmlString)) 1188 + sanitized := p.rctx.SanitizeDefault(htmlString) 1189 + params.RenderedContents = template.HTML(sanitized) 1188 1190 } 1189 1191 } 1190 1192