A privacy-first, self-hosted, fully open source personal knowledge management software, written in typescript and golang. (PERSONAL FORK)
at lambda-fork/main 328 lines 7.8 kB view raw
1// SiYuan - Refactor your thinking 2// Copyright (c) 2020-present, b3log.org 3// 4// This program is free software: you can redistribute it and/or modify 5// it under the terms of the GNU Affero General Public License as published by 6// the Free Software Foundation, either version 3 of the License, or 7// (at your option) any later version. 8// 9// This program is distributed in the hope that it will be useful, 10// but WITHOUT ANY WARRANTY; without even the implied warranty of 11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12// GNU Affero General Public License for more details. 13// 14// You should have received a copy of the GNU Affero General Public License 15// along with this program. If not, see <https://www.gnu.org/licenses/>. 16 17package util 18 19import ( 20 "bytes" 21 "fmt" 22 "math/rand" 23 "regexp" 24 "strconv" 25 "strings" 26 "time" 27 "unicode" 28 29 "github.com/88250/lute/html" 30 "github.com/siyuan-note/logging" 31) 32 33func init() { 34 rand.Seed(time.Now().UTC().UnixNano()) 35} 36 37func GetDuplicateName(master string) (ret string) { 38 if "" == master { 39 return 40 } 41 42 ret = master + " (1)" 43 r := regexp.MustCompile("^(.*) \\((\\d+)\\)$") 44 m := r.FindStringSubmatch(master) 45 if nil == m || 3 > len(m) { 46 return 47 } 48 49 num, _ := strconv.Atoi(m[2]) 50 num++ 51 ret = fmt.Sprintf("%s (%d)", m[1], num) 52 return 53} 54 55var ( 56 letter = []rune("abcdefghijklmnopqrstuvwxyz0123456789") 57) 58 59func RandString(length int) string { 60 b := make([]rune, length) 61 for i := range b { 62 b[i] = letter[rand.Intn(len(letter))] 63 } 64 return string(b) 65} 66 67// InsertElem inserts value at index into s. 68// 0 <= index <= len(s) 69func InsertElem[T any](s []T, index int, value T) []T { 70 if len(s) == index { // nil or empty slice or after last element 71 return append(s, value) 72 } 73 74 s = append(s[:index+1], s[index:]...) // index < len(s) 75 s[index] = value 76 return s 77} 78 79// RemoveElem removes the element at index i from s. 80func RemoveElem[T any](s []T, index int) []T { 81 return append(s[:index], s[index+1:]...) 82} 83 84func EscapeHTML(s string) (ret string) { 85 ret = s 86 if "" == strings.TrimSpace(ret) { 87 return 88 } 89 90 ret = html.EscapeString(ret) 91 return 92} 93 94func UnescapeHTML(s string) (ret string) { 95 ret = s 96 if "" == strings.TrimSpace(ret) { 97 return 98 } 99 100 ret = html.UnescapeString(ret) 101 return 102} 103 104func HasUnclosedHtmlTag(htmlStr string) bool { 105 // 检查未闭合注释 106 openIdx := 0 107 for { 108 start := strings.Index(htmlStr[openIdx:], "<!--") 109 if start == -1 { 110 break 111 } 112 start += openIdx 113 end := strings.Index(htmlStr[start+4:], "-->") 114 if end == -1 { 115 return true // 存在未闭合注释 116 } 117 openIdx = start + 4 + end + 3 118 } 119 120 // 去除所有注释内容 121 commentRe := regexp.MustCompile(`<!--[\s\S]*?-->`) 122 htmlStr = commentRe.ReplaceAllString(htmlStr, "") 123 124 tagRe := regexp.MustCompile(`<(/?)([a-zA-Z0-9]+)[^>]*?>`) 125 selfClosing := map[string]bool{ 126 "br": true, "img": true, "hr": true, "input": true, "meta": true, "link": true, 127 } 128 stack := []string{} 129 matches := tagRe.FindAllStringSubmatch(htmlStr, -1) 130 for _, m := range matches { 131 isClose := m[1] == "/" 132 tag := strings.ToLower(m[2]) 133 if selfClosing[tag] { 134 continue 135 } 136 if !isClose { 137 stack = append(stack, tag) 138 } else { 139 if len(stack) == 0 || stack[len(stack)-1] != tag { 140 return true // 闭合标签不匹配 141 } 142 stack = stack[:len(stack)-1] 143 } 144 } 145 return len(stack) != 0 146} 147 148func Reverse(s string) string { 149 runes := []rune(s) 150 for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 { 151 runes[i], runes[j] = runes[j], runes[i] 152 } 153 return string(runes) 154} 155 156func RemoveRedundantSpace(str string) string { 157 buf := bytes.Buffer{} 158 lastIsChinese := false 159 lastIsSpace := false 160 for _, r := range str { 161 if unicode.IsSpace(r) { 162 if lastIsChinese || lastIsSpace { 163 continue 164 } 165 buf.WriteRune(' ') 166 lastIsChinese = false 167 lastIsSpace = true 168 continue 169 } 170 171 lastIsSpace = false 172 buf.WriteRune(r) 173 if unicode.Is(unicode.Han, r) { 174 lastIsChinese = true 175 continue 176 } else { 177 lastIsChinese = false 178 } 179 } 180 return buf.String() 181} 182 183func Convert2Float(s string) (float64, bool) { 184 s = RemoveInvalid(s) 185 s = strings.ReplaceAll(s, " ", "") 186 s = strings.ReplaceAll(s, ",", "") 187 buf := bytes.Buffer{} 188 for _, r := range s { 189 if unicode.IsDigit(r) || '.' == r || '-' == r { 190 buf.WriteRune(r) 191 } 192 } 193 s = buf.String() 194 ret, err := strconv.ParseFloat(strings.TrimSpace(s), 64) 195 if err != nil { 196 return 0, false 197 } 198 return ret, true 199} 200 201func ContainsSubStr(s string, subStrs []string) bool { 202 for _, v := range subStrs { 203 if strings.Contains(s, v) { 204 return true 205 } 206 } 207 return false 208} 209 210func GetContainsSubStrs(s string, subStrs []string) (ret []string) { 211 for _, v := range subStrs { 212 if strings.Contains(s, v) { 213 ret = append(ret, v) 214 } 215 } 216 return 217} 218 219func ReplaceStr(strs []string, old, new string) (ret []string, changed bool) { 220 if old == new { 221 return strs, false 222 } 223 224 for i, v := range strs { 225 if v == old { 226 strs[i] = new 227 changed = true 228 } 229 } 230 ret = strs 231 return 232} 233 234func SanitizeSVG(svgInput string) string { 235 // 1. 将字符串解析为节点树 236 doc, err := html.Parse(strings.NewReader(svgInput)) 237 if err != nil { 238 logging.LogWarnf("parse svg failed: %v", err) 239 return svgInput 240 } 241 242 // 2. 定义递归移除逻辑 243 var walk func(*html.Node) 244 walk = func(n *html.Node) { 245 // 倒序遍历子节点,确保删除操作不影响后续迭代 246 for c := n.FirstChild; c != nil; { 247 next := c.NextSibling 248 if c.Type == html.ElementNode { 249 tag := strings.ToLower(c.Data) 250 if tag == "script" || tag == "iframe" || tag == "object" || tag == "embed" || tag == "foreignobject" { 251 n.RemoveChild(c) 252 c = next 253 continue 254 } 255 256 // 清理不安全属性 257 if len(c.Attr) > 0 { 258 // 过滤属性:删除以 on 开头的属性(事件处理),href/xlink:href 指向 javascript: 或不安全 data:,以及危险的 style 表达式 259 filtered := c.Attr[:0] 260 for _, a := range c.Attr { 261 key := strings.ToLower(a.Key) 262 val := strings.TrimSpace(strings.ToLower(a.Val)) 263 264 // 删除事件处理器属性(onload, onerror 等) 265 if strings.HasPrefix(key, "on") { 266 continue 267 } 268 269 // 删除 href 或 xlink:href 指向 javascript: 或某些不安全的 data: URI 270 if key == "href" || key == "xlink:href" || key == "xlinkhref" { 271 if strings.HasPrefix(val, "javascript:") { 272 continue 273 } 274 // 对 data: 做保守处理,删除包含可执行内容的 data:text/html 或 data:image/svg+xml 275 if strings.HasPrefix(val, "data:") { 276 if strings.Contains(val, "text/html") || strings.Contains(val, "image/svg+xml") || strings.Contains(val, "application/xhtml+xml") { 277 continue 278 } 279 } 280 } 281 282 // 清理 style 中的危险表达式,如 expression() 或 url(javascript:...) 283 if key == "style" { 284 low := val 285 if strings.Contains(low, "expression(") || strings.Contains(low, "url(javascript:") || strings.Contains(low, "javascript:") { 286 // 丢弃整个 style 属性以保证安全 287 continue 288 } 289 } 290 291 // 其它属性保留 292 filtered = append(filtered, a) 293 } 294 c.Attr = filtered 295 } 296 } 297 298 // 递归处理子节点(如果节点尚未被删除) 299 if c.Parent != nil { 300 walk(c) 301 } 302 303 c = next 304 } 305 } 306 307 // 3. 执行移除 308 walk(doc) 309 310 // 4. 将处理后的树重新渲染回字符串 311 var buf bytes.Buffer 312 if err = html.Render(&buf, doc); err != nil { 313 logging.LogWarnf("render svg failed: %v", err) 314 return svgInput 315 } 316 317 // 5. 提取 SVG 部分 (html.Render 会自动加上 <html><body> 标签) 318 return extractSVG(buf.String()) 319} 320 321func extractSVG(fullHTML string) string { 322 start := strings.Index(fullHTML, "<svg") 323 end := strings.LastIndex(fullHTML, "</svg>") 324 if start == -1 || end == -1 { 325 return fullHTML 326 } 327 return fullHTML[start : end+6] 328}