A privacy-first, self-hosted, fully open source personal knowledge management software, written in typescript and golang. (PERSONAL FORK)
1// SiYuan - Refactor your thinking
2// Copyright (c) 2020-present, b3log.org
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17package util
18
19import (
20 "bytes"
21 "fmt"
22 "math/rand"
23 "regexp"
24 "strconv"
25 "strings"
26 "time"
27 "unicode"
28
29 "github.com/88250/lute/html"
30 "github.com/siyuan-note/logging"
31)
32
33func init() {
34 rand.Seed(time.Now().UTC().UnixNano())
35}
36
37func GetDuplicateName(master string) (ret string) {
38 if "" == master {
39 return
40 }
41
42 ret = master + " (1)"
43 r := regexp.MustCompile("^(.*) \\((\\d+)\\)$")
44 m := r.FindStringSubmatch(master)
45 if nil == m || 3 > len(m) {
46 return
47 }
48
49 num, _ := strconv.Atoi(m[2])
50 num++
51 ret = fmt.Sprintf("%s (%d)", m[1], num)
52 return
53}
54
55var (
56 letter = []rune("abcdefghijklmnopqrstuvwxyz0123456789")
57)
58
59func RandString(length int) string {
60 b := make([]rune, length)
61 for i := range b {
62 b[i] = letter[rand.Intn(len(letter))]
63 }
64 return string(b)
65}
66
67// InsertElem inserts value at index into s.
68// 0 <= index <= len(s)
69func InsertElem[T any](s []T, index int, value T) []T {
70 if len(s) == index { // nil or empty slice or after last element
71 return append(s, value)
72 }
73
74 s = append(s[:index+1], s[index:]...) // index < len(s)
75 s[index] = value
76 return s
77}
78
79// RemoveElem removes the element at index i from s.
80func RemoveElem[T any](s []T, index int) []T {
81 return append(s[:index], s[index+1:]...)
82}
83
84func EscapeHTML(s string) (ret string) {
85 ret = s
86 if "" == strings.TrimSpace(ret) {
87 return
88 }
89
90 ret = html.EscapeString(ret)
91 return
92}
93
94func UnescapeHTML(s string) (ret string) {
95 ret = s
96 if "" == strings.TrimSpace(ret) {
97 return
98 }
99
100 ret = html.UnescapeString(ret)
101 return
102}
103
104func HasUnclosedHtmlTag(htmlStr string) bool {
105 // 检查未闭合注释
106 openIdx := 0
107 for {
108 start := strings.Index(htmlStr[openIdx:], "<!--")
109 if start == -1 {
110 break
111 }
112 start += openIdx
113 end := strings.Index(htmlStr[start+4:], "-->")
114 if end == -1 {
115 return true // 存在未闭合注释
116 }
117 openIdx = start + 4 + end + 3
118 }
119
120 // 去除所有注释内容
121 commentRe := regexp.MustCompile(`<!--[\s\S]*?-->`)
122 htmlStr = commentRe.ReplaceAllString(htmlStr, "")
123
124 tagRe := regexp.MustCompile(`<(/?)([a-zA-Z0-9]+)[^>]*?>`)
125 selfClosing := map[string]bool{
126 "br": true, "img": true, "hr": true, "input": true, "meta": true, "link": true,
127 }
128 stack := []string{}
129 matches := tagRe.FindAllStringSubmatch(htmlStr, -1)
130 for _, m := range matches {
131 isClose := m[1] == "/"
132 tag := strings.ToLower(m[2])
133 if selfClosing[tag] {
134 continue
135 }
136 if !isClose {
137 stack = append(stack, tag)
138 } else {
139 if len(stack) == 0 || stack[len(stack)-1] != tag {
140 return true // 闭合标签不匹配
141 }
142 stack = stack[:len(stack)-1]
143 }
144 }
145 return len(stack) != 0
146}
147
148func Reverse(s string) string {
149 runes := []rune(s)
150 for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 {
151 runes[i], runes[j] = runes[j], runes[i]
152 }
153 return string(runes)
154}
155
156func RemoveRedundantSpace(str string) string {
157 buf := bytes.Buffer{}
158 lastIsChinese := false
159 lastIsSpace := false
160 for _, r := range str {
161 if unicode.IsSpace(r) {
162 if lastIsChinese || lastIsSpace {
163 continue
164 }
165 buf.WriteRune(' ')
166 lastIsChinese = false
167 lastIsSpace = true
168 continue
169 }
170
171 lastIsSpace = false
172 buf.WriteRune(r)
173 if unicode.Is(unicode.Han, r) {
174 lastIsChinese = true
175 continue
176 } else {
177 lastIsChinese = false
178 }
179 }
180 return buf.String()
181}
182
183func Convert2Float(s string) (float64, bool) {
184 s = RemoveInvalid(s)
185 s = strings.ReplaceAll(s, " ", "")
186 s = strings.ReplaceAll(s, ",", "")
187 buf := bytes.Buffer{}
188 for _, r := range s {
189 if unicode.IsDigit(r) || '.' == r || '-' == r {
190 buf.WriteRune(r)
191 }
192 }
193 s = buf.String()
194 ret, err := strconv.ParseFloat(strings.TrimSpace(s), 64)
195 if err != nil {
196 return 0, false
197 }
198 return ret, true
199}
200
201func ContainsSubStr(s string, subStrs []string) bool {
202 for _, v := range subStrs {
203 if strings.Contains(s, v) {
204 return true
205 }
206 }
207 return false
208}
209
210func GetContainsSubStrs(s string, subStrs []string) (ret []string) {
211 for _, v := range subStrs {
212 if strings.Contains(s, v) {
213 ret = append(ret, v)
214 }
215 }
216 return
217}
218
219func ReplaceStr(strs []string, old, new string) (ret []string, changed bool) {
220 if old == new {
221 return strs, false
222 }
223
224 for i, v := range strs {
225 if v == old {
226 strs[i] = new
227 changed = true
228 }
229 }
230 ret = strs
231 return
232}
233
234func SanitizeSVG(svgInput string) string {
235 // 1. 将字符串解析为节点树
236 doc, err := html.Parse(strings.NewReader(svgInput))
237 if err != nil {
238 logging.LogWarnf("parse svg failed: %v", err)
239 return svgInput
240 }
241
242 // 2. 定义递归移除逻辑
243 var walk func(*html.Node)
244 walk = func(n *html.Node) {
245 // 倒序遍历子节点,确保删除操作不影响后续迭代
246 for c := n.FirstChild; c != nil; {
247 next := c.NextSibling
248 if c.Type == html.ElementNode {
249 tag := strings.ToLower(c.Data)
250 if tag == "script" || tag == "iframe" || tag == "object" || tag == "embed" || tag == "foreignobject" {
251 n.RemoveChild(c)
252 c = next
253 continue
254 }
255
256 // 清理不安全属性
257 if len(c.Attr) > 0 {
258 // 过滤属性:删除以 on 开头的属性(事件处理),href/xlink:href 指向 javascript: 或不安全 data:,以及危险的 style 表达式
259 filtered := c.Attr[:0]
260 for _, a := range c.Attr {
261 key := strings.ToLower(a.Key)
262 val := strings.TrimSpace(strings.ToLower(a.Val))
263
264 // 删除事件处理器属性(onload, onerror 等)
265 if strings.HasPrefix(key, "on") {
266 continue
267 }
268
269 // 删除 href 或 xlink:href 指向 javascript: 或某些不安全的 data: URI
270 if key == "href" || key == "xlink:href" || key == "xlinkhref" {
271 if strings.HasPrefix(val, "javascript:") {
272 continue
273 }
274 // 对 data: 做保守处理,删除包含可执行内容的 data:text/html 或 data:image/svg+xml
275 if strings.HasPrefix(val, "data:") {
276 if strings.Contains(val, "text/html") || strings.Contains(val, "image/svg+xml") || strings.Contains(val, "application/xhtml+xml") {
277 continue
278 }
279 }
280 }
281
282 // 清理 style 中的危险表达式,如 expression() 或 url(javascript:...)
283 if key == "style" {
284 low := val
285 if strings.Contains(low, "expression(") || strings.Contains(low, "url(javascript:") || strings.Contains(low, "javascript:") {
286 // 丢弃整个 style 属性以保证安全
287 continue
288 }
289 }
290
291 // 其它属性保留
292 filtered = append(filtered, a)
293 }
294 c.Attr = filtered
295 }
296 }
297
298 // 递归处理子节点(如果节点尚未被删除)
299 if c.Parent != nil {
300 walk(c)
301 }
302
303 c = next
304 }
305 }
306
307 // 3. 执行移除
308 walk(doc)
309
310 // 4. 将处理后的树重新渲染回字符串
311 var buf bytes.Buffer
312 if err = html.Render(&buf, doc); err != nil {
313 logging.LogWarnf("render svg failed: %v", err)
314 return svgInput
315 }
316
317 // 5. 提取 SVG 部分 (html.Render 会自动加上 <html><body> 标签)
318 return extractSVG(buf.String())
319}
320
321func extractSVG(fullHTML string) string {
322 start := strings.Index(fullHTML, "<svg")
323 end := strings.LastIndex(fullHTML, "</svg>")
324 if start == -1 || end == -1 {
325 return fullHTML
326 }
327 return fullHTML[start : end+6]
328}