A privacy-first, self-hosted, fully open source personal knowledge management software, written in typescript and golang. (PERSONAL FORK)
1// SiYuan - Refactor your thinking
2// Copyright (c) 2020-present, b3log.org
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17package model
18
19import (
20 "bytes"
21 "encoding/base64"
22 "encoding/json"
23 "errors"
24 "fmt"
25 "image"
26 "image/jpeg"
27 "image/png"
28 "io"
29 "io/fs"
30 "net/url"
31 "os"
32 "path"
33 "path/filepath"
34 "regexp"
35 "runtime/debug"
36 "sort"
37 "strings"
38
39 "github.com/88250/gulu"
40 "github.com/88250/lute"
41 "github.com/88250/lute/ast"
42 "github.com/88250/lute/html"
43 "github.com/88250/lute/html/atom"
44 "github.com/88250/lute/parse"
45 "github.com/88250/lute/render"
46 util2 "github.com/88250/lute/util"
47 "github.com/siyuan-note/dataparser"
48 "github.com/siyuan-note/filelock"
49 "github.com/siyuan-note/logging"
50 "github.com/siyuan-note/riff"
51 "github.com/siyuan-note/siyuan/kernel/av"
52 "github.com/siyuan-note/siyuan/kernel/filesys"
53 "github.com/siyuan-note/siyuan/kernel/sql"
54 "github.com/siyuan-note/siyuan/kernel/task"
55 "github.com/siyuan-note/siyuan/kernel/treenode"
56 "github.com/siyuan-note/siyuan/kernel/util"
57)
58
59func HTML2Markdown(htmlStr string, luteEngine *lute.Lute) (markdown string, withMath bool, err error) {
60 tree, withMath := HTML2Tree(htmlStr, luteEngine)
61
62 var formatted []byte
63 renderer := render.NewFormatRenderer(tree, luteEngine.RenderOptions)
64 for nodeType, rendererFunc := range luteEngine.HTML2MdRendererFuncs {
65 renderer.ExtRendererFuncs[nodeType] = rendererFunc
66 }
67 formatted = renderer.Render()
68 markdown = gulu.Str.FromBytes(formatted)
69 return
70}
71
72func HTML2Tree(htmlStr string, luteEngine *lute.Lute) (tree *parse.Tree, withMath bool) {
73 htmlStr = gulu.Str.RemovePUA(htmlStr)
74 assetDirPath := filepath.Join(util.DataDir, "assets")
75 tree = luteEngine.HTML2Tree(htmlStr)
76 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
77 if !entering {
78 return ast.WalkContinue
79 }
80
81 if ast.NodeText == n.Type {
82 if n.ParentIs(ast.NodeTableCell) {
83 n.Tokens = bytes.ReplaceAll(n.Tokens, []byte("\\|"), []byte("|"))
84 n.Tokens = bytes.ReplaceAll(n.Tokens, []byte("|"), []byte("\\|"))
85 n.Tokens = bytes.ReplaceAll(n.Tokens, []byte("\\<br /\\>"), []byte("<br />"))
86 }
87 }
88
89 if ast.NodeInlineMath == n.Type {
90 withMath = true
91 return ast.WalkContinue
92 }
93
94 if ast.NodeLinkDest != n.Type {
95 return ast.WalkContinue
96 }
97
98 dest := n.TokensStr()
99 if strings.HasPrefix(dest, "data:image") && strings.Contains(dest, ";base64,") {
100 processBase64Img(n, dest, assetDirPath)
101 return ast.WalkContinue
102 }
103 return ast.WalkContinue
104 })
105 return
106}
107
108func ImportSY(zipPath, boxID, toPath string) (err error) {
109 util.PushEndlessProgress(Conf.Language(73))
110 defer util.ClearPushProgress(100)
111
112 lockSync()
113 defer unlockSync()
114
115 baseName := filepath.Base(zipPath)
116 ext := filepath.Ext(baseName)
117 baseName = strings.TrimSuffix(baseName, ext)
118 unzipPath := filepath.Join(filepath.Dir(zipPath), baseName+"-"+gulu.Rand.String(7))
119 err = gulu.Zip.Unzip(zipPath, unzipPath)
120 if err != nil {
121 return
122 }
123 defer os.RemoveAll(unzipPath)
124
125 var syPaths []string
126 filelock.Walk(unzipPath, func(path string, d fs.DirEntry, err error) error {
127 if err != nil {
128 return err
129 }
130 if d == nil {
131 return nil
132 }
133 if !d.IsDir() && strings.HasSuffix(d.Name(), ".sy") {
134 syPaths = append(syPaths, path)
135 }
136 return nil
137 })
138
139 entries, err := os.ReadDir(unzipPath)
140 if err != nil {
141 logging.LogErrorf("read unzip dir [%s] failed: %s", unzipPath, err)
142 return
143 }
144 if 1 != len(entries) {
145 logging.LogErrorf("invalid .sy.zip [%v]", entries)
146 return errors.New(Conf.Language(199))
147 }
148 unzipRootPath := filepath.Join(unzipPath, entries[0].Name())
149 name := filepath.Base(unzipRootPath)
150 if strings.HasPrefix(name, "data-20") && len("data-20230321175442") == len(name) {
151 logging.LogErrorf("invalid .sy.zip [unzipRootPath=%s, baseName=%s]", unzipRootPath, name)
152 return errors.New(Conf.Language(199))
153 }
154
155 luteEngine := util.NewLute()
156 blockIDs := map[string]string{}
157 trees := map[string]*parse.Tree{}
158
159 // 重新生成块 ID
160 for i, syPath := range syPaths {
161 data, readErr := os.ReadFile(syPath)
162 if nil != readErr {
163 logging.LogErrorf("read .sy [%s] failed: %s", syPath, readErr)
164 err = readErr
165 return
166 }
167 tree, _, parseErr := dataparser.ParseJSON(data, luteEngine.ParseOptions)
168 if nil != parseErr {
169 logging.LogErrorf("parse .sy [%s] failed: %s", syPath, parseErr)
170 err = parseErr
171 return
172 }
173 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
174 if !entering || "" == n.ID {
175 return ast.WalkContinue
176 }
177
178 // 新 ID 保留时间部分,仅修改随机值,避免时间变化导致更新时间早于创建时间
179 // Keep original creation time when importing .sy.zip https://github.com/siyuan-note/siyuan/issues/9923
180 newNodeID := util.TimeFromID(n.ID) + "-" + util.RandString(7)
181 blockIDs[n.ID] = newNodeID
182 n.ID = newNodeID
183 n.SetIALAttr("id", newNodeID)
184
185 if icon := n.IALAttr("icon"); "" != icon {
186 // XSS through emoji name https://github.com/siyuan-note/siyuan/issues/15034
187 icon = util.FilterUploadEmojiFileName(icon)
188 n.SetIALAttr("icon", icon)
189 }
190
191 return ast.WalkContinue
192 })
193 tree.ID = tree.Root.ID
194 tree.Path = filepath.ToSlash(strings.TrimPrefix(syPath, unzipRootPath))
195 trees[tree.ID] = tree
196 util.PushEndlessProgress(Conf.language(73) + " " + fmt.Sprintf(Conf.language(70), fmt.Sprintf("%d/%d", i+1, len(syPaths))))
197 }
198
199 // 引用和嵌入指向重新生成的块 ID
200 for _, tree := range trees {
201 util.PushEndlessProgress(Conf.language(73) + " " + fmt.Sprintf(Conf.language(70), tree.Root.IALAttr("title")))
202 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
203 if !entering {
204 return ast.WalkContinue
205 }
206
207 if treenode.IsBlockRef(n) {
208 defID, _, _ := treenode.GetBlockRef(n)
209 newDefID := blockIDs[defID]
210 if "" != newDefID {
211 n.TextMarkBlockRefID = newDefID
212 }
213 } else if ast.NodeTextMark == n.Type && n.IsTextMarkType("a") && strings.HasPrefix(n.TextMarkAHref, "siyuan://blocks/") {
214 // Block hyperlinks do not point to regenerated block IDs when importing .sy.zip https://github.com/siyuan-note/siyuan/issues/9083
215 defID := strings.TrimPrefix(n.TextMarkAHref, "siyuan://blocks/")
216 newDefID := blockIDs[defID]
217 if "" != newDefID {
218 n.TextMarkAHref = "siyuan://blocks/" + newDefID
219 }
220 } else if ast.NodeBlockQueryEmbedScript == n.Type {
221 for oldID, newID := range blockIDs {
222 // 导入 `.sy.zip` 后查询嵌入块失效 https://github.com/siyuan-note/siyuan/issues/5316
223 n.Tokens = bytes.ReplaceAll(n.Tokens, []byte(oldID), []byte(newID))
224 }
225 }
226 return ast.WalkContinue
227 })
228 }
229
230 var replacements []string
231 for oldID, newID := range blockIDs {
232 replacements = append(replacements, oldID, newID)
233 }
234 blockIDReplacer := strings.NewReplacer(replacements...)
235
236 // 将关联的数据库文件移动到 data/storage/av/ 下
237 storage := filepath.Join(unzipRootPath, "storage")
238 storageAvDir := filepath.Join(storage, "av")
239 avIDs := map[string]string{}
240 renameAvPaths := map[string]string{}
241 if gulu.File.IsExist(storageAvDir) {
242 // 重新生成数据库数据
243 filelock.Walk(storageAvDir, func(path string, d fs.DirEntry, err error) error {
244 if err != nil {
245 return err
246 }
247 if d == nil {
248 return nil
249 }
250 if !strings.HasSuffix(path, ".json") || !ast.IsNodeIDPattern(strings.TrimSuffix(d.Name(), ".json")) {
251 return nil
252 }
253
254 // 重命名数据库
255 newAvID := ast.NewNodeID()
256 oldAvID := strings.TrimSuffix(d.Name(), ".json")
257 newPath := filepath.Join(filepath.Dir(path), newAvID+".json")
258 renameAvPaths[path] = newPath
259 avIDs[oldAvID] = newAvID
260 return nil
261 })
262
263 // 重命名数据库文件
264 for oldPath, newPath := range renameAvPaths {
265 data, readErr := os.ReadFile(oldPath)
266 if nil != readErr {
267 logging.LogErrorf("read av file [%s] failed: %s", oldPath, readErr)
268 return nil
269 }
270
271 // 将数据库文件中的 ID 替换为新的 ID
272 newData := data
273 for oldAvID, newAvID := range avIDs {
274 newData = bytes.ReplaceAll(newData, []byte(oldAvID), []byte(newAvID))
275 }
276 newData = []byte(blockIDReplacer.Replace(string(newData)))
277 if !bytes.Equal(data, newData) {
278 if writeErr := os.WriteFile(oldPath, newData, 0644); nil != writeErr {
279 logging.LogErrorf("write av file [%s] failed: %s", oldPath, writeErr)
280 return nil
281 }
282 }
283
284 if err = os.Rename(oldPath, newPath); err != nil {
285 logging.LogErrorf("rename av file from [%s] to [%s] failed: %s", oldPath, newPath, err)
286 return
287 }
288 }
289
290 targetStorageAvDir := filepath.Join(util.DataDir, "storage", "av")
291 if copyErr := filelock.Copy(storageAvDir, targetStorageAvDir); nil != copyErr {
292 logging.LogErrorf("copy storage av dir from [%s] to [%s] failed: %s", storageAvDir, targetStorageAvDir, copyErr)
293 }
294
295 // 重新指向数据库属性值
296 for _, tree := range trees {
297 util.PushEndlessProgress(Conf.language(73) + " " + fmt.Sprintf(Conf.language(70), tree.Root.IALAttr("title")))
298 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
299 if !entering || "" == n.ID {
300 return ast.WalkContinue
301 }
302
303 ial := parse.IAL2Map(n.KramdownIAL)
304 for k, v := range ial {
305 if strings.HasPrefix(k, av.NodeAttrNameAvs) {
306 newKey, newVal := k, v
307 for oldAvID, newAvID := range avIDs {
308 newKey = strings.ReplaceAll(newKey, oldAvID, newAvID)
309 newVal = strings.ReplaceAll(newVal, oldAvID, newAvID)
310 }
311 n.RemoveIALAttr(k)
312 n.SetIALAttr(newKey, newVal)
313 }
314 }
315
316 if ast.NodeAttributeView == n.Type {
317 n.AttributeViewID = avIDs[n.AttributeViewID]
318 }
319 return ast.WalkContinue
320 })
321
322 // 关联数据库和块
323 avNodes := tree.Root.ChildrenByType(ast.NodeAttributeView)
324 av.BatchUpsertBlockRel(avNodes)
325 }
326
327 // 如果数据库中绑定的块不在导入的文档中,则需要单独更新这些绑定块的属性
328 var attrViewIDs []string
329 for _, avID := range avIDs {
330 attrViewIDs = append(attrViewIDs, avID)
331 }
332 updateBoundBlockAvsAttribute(attrViewIDs)
333
334 // 插入关联关系 https://github.com/siyuan-note/siyuan/issues/11628
335 relationAvs := map[string]string{}
336 for _, avID := range avIDs {
337 attrView, _ := av.ParseAttributeView(avID)
338 if nil == attrView {
339 continue
340 }
341
342 for _, keyValues := range attrView.KeyValues {
343 if nil != keyValues.Key && av.KeyTypeRelation == keyValues.Key.Type && nil != keyValues.Key.Relation {
344 relationAvs[avID] = keyValues.Key.Relation.AvID
345 }
346 }
347 }
348
349 for srcAvID, destAvID := range relationAvs {
350 av.UpsertAvBackRel(srcAvID, destAvID)
351 }
352 }
353
354 // 将关联的闪卡数据合并到默认卡包 data/storage/riff/20230218211946-2kw8jgx 中
355 storageRiffDir := filepath.Join(storage, "riff")
356 if gulu.File.IsExist(storageRiffDir) {
357 deckToImport, loadErr := riff.LoadDeck(storageRiffDir, builtinDeckID, Conf.Flashcard.RequestRetention, Conf.Flashcard.MaximumInterval, Conf.Flashcard.Weights)
358 if nil != loadErr {
359 logging.LogErrorf("load deck [%s] failed: %s", name, loadErr)
360 } else {
361 deck := Decks[builtinDeckID]
362 if nil == deck {
363 var createErr error
364 deck, createErr = createDeck0("Built-in Deck", builtinDeckID)
365 if nil == createErr {
366 Decks[deck.ID] = deck
367 }
368 }
369
370 bIDs := deckToImport.GetBlockIDs()
371 cards := deckToImport.GetCardsByBlockIDs(bIDs)
372 for _, card := range cards {
373 deck.AddCard(ast.NewNodeID(), blockIDs[card.BlockID()])
374 }
375
376 if 0 < len(cards) {
377 if saveErr := deck.Save(); nil != saveErr {
378 logging.LogErrorf("save deck [%s] failed: %s", name, saveErr)
379 }
380 }
381 }
382 }
383
384 // storage 文件夹已在上方处理,所以这里删除源 storage 文件夹,避免后面被拷贝到导入目录下 targetDir
385 if removeErr := os.RemoveAll(storage); nil != removeErr {
386 logging.LogErrorf("remove temp storage av dir failed: %s", removeErr)
387 }
388
389 if 1 > len(avIDs) { // 如果本次没有导入数据库,则清理掉文档中的数据库属性 https://github.com/siyuan-note/siyuan/issues/13011
390 for _, tree := range trees {
391 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
392 if !entering || !n.IsBlock() {
393 return ast.WalkContinue
394 }
395
396 n.RemoveIALAttr(av.NodeAttrNameAvs)
397 return ast.WalkContinue
398 })
399 }
400 }
401
402 // 写回 .sy
403 for _, tree := range trees {
404 util.PushEndlessProgress(Conf.language(73) + " " + fmt.Sprintf(Conf.language(70), tree.Root.IALAttr("title")))
405 syPath := filepath.Join(unzipRootPath, tree.Path)
406 if "" == tree.Root.Spec {
407 parse.NestedInlines2FlattedSpans(tree, false)
408 tree.Root.Spec = "1"
409 }
410 renderer := render.NewJSONRenderer(tree, luteEngine.RenderOptions)
411 data := renderer.Render()
412
413 if !util.UseSingleLineSave {
414 buf := bytes.Buffer{}
415 buf.Grow(1024 * 1024 * 2)
416 if err = json.Indent(&buf, data, "", "\t"); err != nil {
417 return
418 }
419 data = buf.Bytes()
420 }
421
422 if err = os.WriteFile(syPath, data, 0644); err != nil {
423 logging.LogErrorf("write .sy [%s] failed: %s", syPath, err)
424 return
425 }
426 newSyPath := filepath.Join(filepath.Dir(syPath), tree.ID+".sy")
427 if err = filelock.Rename(syPath, newSyPath); err != nil {
428 logging.LogErrorf("rename .sy from [%s] to [%s] failed: %s", syPath, newSyPath, err)
429 return
430 }
431 }
432
433 // 合并 sort.json
434 fullSortIDs := map[string]int{}
435 sortIDs := map[string]int{}
436 var sortData []byte
437 var sortErr error
438 sortPath := filepath.Join(unzipRootPath, ".siyuan", "sort.json")
439 if filelock.IsExist(sortPath) {
440 sortData, sortErr = filelock.ReadFile(sortPath)
441 if nil != sortErr {
442 logging.LogErrorf("read import sort conf failed: %s", sortErr)
443 }
444
445 if sortErr = gulu.JSON.UnmarshalJSON(sortData, &sortIDs); nil != sortErr {
446 logging.LogErrorf("unmarshal sort conf failed: %s", sortErr)
447 }
448
449 boxSortPath := filepath.Join(util.DataDir, boxID, ".siyuan", "sort.json")
450 if filelock.IsExist(boxSortPath) {
451 sortData, sortErr = filelock.ReadFile(boxSortPath)
452 if nil != sortErr {
453 logging.LogErrorf("read box sort conf failed: %s", sortErr)
454 }
455
456 if sortErr = gulu.JSON.UnmarshalJSON(sortData, &fullSortIDs); nil != sortErr {
457 logging.LogErrorf("unmarshal box sort conf failed: %s", sortErr)
458 }
459 }
460
461 for oldID, sort := range sortIDs {
462 if newID := blockIDs[oldID]; "" != newID {
463 fullSortIDs[newID] = sort
464 }
465 }
466
467 sortData, sortErr = gulu.JSON.MarshalJSON(fullSortIDs)
468 if nil != sortErr {
469 logging.LogErrorf("marshal box full sort conf failed: %s", sortErr)
470 } else {
471 sortErr = filelock.WriteFile(boxSortPath, sortData)
472 if nil != sortErr {
473 logging.LogErrorf("write box full sort conf failed: %s", sortErr)
474 }
475 }
476 if removeErr := os.RemoveAll(sortPath); nil != removeErr {
477 logging.LogErrorf("remove temp sort conf failed: %s", removeErr)
478 }
479 }
480
481 // 重命名文件路径
482 renamePaths := map[string]string{}
483 filelock.Walk(unzipRootPath, func(path string, d fs.DirEntry, err error) error {
484 if err != nil {
485 return err
486 }
487 if d == nil {
488 return nil
489 }
490 if d.IsDir() && ast.IsNodeIDPattern(d.Name()) {
491 renamePaths[path] = path
492 }
493 return nil
494 })
495 for p, _ := range renamePaths {
496 originalPath := p
497 p = strings.TrimPrefix(p, unzipRootPath)
498 p = filepath.ToSlash(p)
499 parts := strings.Split(p, "/")
500 buf := bytes.Buffer{}
501 buf.WriteString("/")
502 for i, part := range parts {
503 if "" == part {
504 continue
505 }
506 newNodeID := blockIDs[part]
507 if "" != newNodeID {
508 buf.WriteString(newNodeID)
509 } else {
510 buf.WriteString(part)
511 }
512 if i < len(parts)-1 {
513 buf.WriteString("/")
514 }
515 }
516 newPath := buf.String()
517 renamePaths[originalPath] = filepath.Join(unzipRootPath, newPath)
518 }
519
520 var oldPaths []string
521 for oldPath, _ := range renamePaths {
522 oldPaths = append(oldPaths, oldPath)
523 }
524 sort.Slice(oldPaths, func(i, j int) bool {
525 return strings.Count(oldPaths[i], string(os.PathSeparator)) < strings.Count(oldPaths[j], string(os.PathSeparator))
526 })
527 for i, oldPath := range oldPaths {
528 newPath := renamePaths[oldPath]
529 if err = filelock.Rename(oldPath, newPath); err != nil {
530 logging.LogErrorf("rename path from [%s] to [%s] failed: %s", oldPath, renamePaths[oldPath], err)
531 return errors.New("rename path failed")
532 }
533
534 delete(renamePaths, oldPath)
535 var toRemoves []string
536 newRenamedPaths := map[string]string{}
537 for oldP, newP := range renamePaths {
538 if strings.HasPrefix(oldP, oldPath) {
539 renamedOldP := strings.Replace(oldP, oldPath, newPath, 1)
540 newRenamedPaths[renamedOldP] = newP
541 toRemoves = append(toRemoves, oldPath)
542 }
543 }
544 for _, toRemove := range toRemoves {
545 delete(renamePaths, toRemove)
546 }
547 for oldP, newP := range newRenamedPaths {
548 renamePaths[oldP] = newP
549 }
550 for j := i + 1; j < len(oldPaths); j++ {
551 if strings.HasPrefix(oldPaths[j], oldPath) {
552 renamedOldP := strings.Replace(oldPaths[j], oldPath, newPath, 1)
553 oldPaths[j] = renamedOldP
554 }
555 }
556 }
557
558 // 将包含的资源文件统一移动到 data/assets/ 下
559 var assetsDirs []string
560 filelock.Walk(unzipRootPath, func(path string, d fs.DirEntry, err error) error {
561 if err != nil {
562 return err
563 }
564 if d == nil {
565 return nil
566 }
567 if strings.Contains(path, "assets") && d.IsDir() {
568 assetsDirs = append(assetsDirs, path)
569 }
570 return nil
571 })
572 dataAssets := filepath.Join(util.DataDir, "assets")
573 for _, assets := range assetsDirs {
574 if gulu.File.IsDir(assets) {
575 if err = filelock.Copy(assets, dataAssets); err != nil {
576 logging.LogErrorf("copy assets from [%s] to [%s] failed: %s", assets, dataAssets, err)
577 return
578 }
579 }
580 os.RemoveAll(assets)
581 }
582
583 // 将包含的自定义表情统一移动到 data/emojis/ 下
584 unzipRootEmojisPath := filepath.Join(unzipRootPath, "emojis")
585 filelock.Walk(unzipRootEmojisPath, func(path string, d fs.DirEntry, err error) error {
586 if err != nil {
587 return err
588 }
589 if d == nil {
590 return nil
591 }
592 if !util.IsValidUploadFileName(d.Name()) {
593 emojiFullName := path
594 fullPathFilteredName := filepath.Join(filepath.Dir(path), util.FilterUploadEmojiFileName(d.Name()))
595 // XSS through emoji name https://github.com/siyuan-note/siyuan/issues/15034
596 logging.LogWarnf("renaming invalid custom emoji file [%s] to [%s]", d.Name(), fullPathFilteredName)
597 if removeErr := filelock.Rename(emojiFullName, fullPathFilteredName); nil != removeErr {
598 logging.LogErrorf("renaming invalid custom emoji file to [%s] failed: %s", fullPathFilteredName, removeErr)
599 }
600 }
601 return nil
602 })
603 var emojiDirs []string
604 filelock.Walk(unzipRootPath, func(path string, d fs.DirEntry, err error) error {
605 if err != nil {
606 return err
607 }
608 if d == nil {
609 return nil
610 }
611 if strings.Contains(path, "emojis") && d.IsDir() {
612 emojiDirs = append(emojiDirs, path)
613 }
614 return nil
615 })
616 dataEmojis := filepath.Join(util.DataDir, "emojis")
617 for _, emojis := range emojiDirs {
618 if gulu.File.IsDir(emojis) {
619 if err = filelock.Copy(emojis, dataEmojis); err != nil {
620 logging.LogErrorf("copy emojis from [%s] to [%s] failed: %s", emojis, dataEmojis, err)
621 return
622 }
623 }
624 os.RemoveAll(emojis)
625 }
626
627 var baseTargetPath string
628 if "/" == toPath {
629 baseTargetPath = "/"
630 } else {
631 block := treenode.GetBlockTreeRootByPath(boxID, toPath)
632 if nil == block {
633 logging.LogErrorf("not found block by path [%s]", toPath)
634 return nil
635 }
636 baseTargetPath = strings.TrimSuffix(block.Path, ".sy")
637 }
638
639 targetDir := filepath.Join(util.DataDir, boxID, baseTargetPath)
640 if err = os.MkdirAll(targetDir, 0755); err != nil {
641 return
642 }
643
644 var treePaths []string
645 filelock.Walk(unzipRootPath, func(path string, d fs.DirEntry, err error) error {
646 if err != nil {
647 return err
648 }
649 if d == nil {
650 return nil
651 }
652 if d.IsDir() {
653 if strings.HasPrefix(d.Name(), ".") {
654 return filepath.SkipDir
655 }
656 return nil
657 }
658
659 if !strings.HasSuffix(d.Name(), ".sy") {
660 return nil
661 }
662
663 p := strings.TrimPrefix(path, unzipRootPath)
664 p = filepath.ToSlash(p)
665 treePaths = append(treePaths, p)
666 return nil
667 })
668
669 if err = filelock.Copy(unzipRootPath, targetDir); err != nil {
670 logging.LogErrorf("copy data dir from [%s] to [%s] failed: %s", unzipRootPath, util.DataDir, err)
671 err = errors.New("copy data failed")
672 return
673 }
674
675 boxAbsPath := filepath.Join(util.DataDir, boxID)
676 for _, treePath := range treePaths {
677 absPath := filepath.Join(targetDir, treePath)
678 p := strings.TrimPrefix(absPath, boxAbsPath)
679 p = filepath.ToSlash(p)
680 tree, err := filesys.LoadTree(boxID, p, luteEngine)
681 if err != nil {
682 logging.LogErrorf("load tree [%s] failed: %s", treePath, err)
683 continue
684 }
685
686 treenode.IndexBlockTree(tree)
687 sql.IndexTreeQueue(tree)
688 util.PushEndlessProgress(Conf.language(73) + " " + fmt.Sprintf(Conf.language(70), tree.Root.IALAttr("title")))
689 }
690
691 IncSync()
692
693 task.AppendTask(task.UpdateIDs, util.PushUpdateIDs, blockIDs)
694 return
695}
696
697func ImportData(zipPath string) (err error) {
698 util.PushEndlessProgress(Conf.Language(73))
699 defer util.ClearPushProgress(100)
700
701 lockSync()
702 defer unlockSync()
703
704 logging.LogInfof("import data from [%s]", zipPath)
705 baseName := filepath.Base(zipPath)
706 ext := filepath.Ext(baseName)
707 baseName = strings.TrimSuffix(baseName, ext)
708 unzipPath := filepath.Join(filepath.Dir(zipPath), baseName)
709 err = gulu.Zip.Unzip(zipPath, unzipPath)
710 if err != nil {
711 return
712 }
713 defer os.RemoveAll(unzipPath)
714
715 files, err := filepath.Glob(filepath.Join(unzipPath, "*/*.sy"))
716 if err != nil {
717 logging.LogErrorf("check data.zip failed: %s", err)
718 return errors.New("check data.zip failed")
719 }
720 if 0 < len(files) {
721 return errors.New(Conf.Language(198))
722 }
723 dirs, err := os.ReadDir(unzipPath)
724 if err != nil {
725 logging.LogErrorf("check data.zip failed: %s", err)
726 return errors.New("check data.zip failed")
727 }
728 if 1 != len(dirs) {
729 return errors.New(Conf.Language(198))
730 }
731
732 tmpDataPath := filepath.Join(unzipPath, dirs[0].Name())
733 tmpDataEmojisPath := filepath.Join(tmpDataPath, "emojis")
734 filelock.Walk(tmpDataEmojisPath, func(path string, d fs.DirEntry, err error) error {
735 if err != nil {
736 return err
737 }
738 if d == nil {
739 return nil
740 }
741 if !util.IsValidUploadFileName(d.Name()) {
742 emojiFullName := path
743 fullPathFilteredName := filepath.Join(filepath.Dir(path), util.FilterUploadEmojiFileName(d.Name()))
744 // XSS through emoji name https://github.com/siyuan-note/siyuan/issues/15034
745 logging.LogWarnf("renaming invalid custom emoji file [%s] to [%s]", d.Name(), fullPathFilteredName)
746 if removeErr := filelock.Rename(emojiFullName, fullPathFilteredName); nil != removeErr {
747 logging.LogErrorf("renaming invalid custom emoji file to [%s] failed: %s", fullPathFilteredName, removeErr)
748 }
749 }
750 return nil
751 })
752 if err = filelock.Copy(tmpDataPath, util.DataDir); err != nil {
753 logging.LogErrorf("copy data dir from [%s] to [%s] failed: %s", tmpDataPath, util.DataDir, err)
754 err = errors.New("copy data failed")
755 return
756 }
757
758 logging.LogInfof("import data from [%s] done", zipPath)
759 IncSync()
760 FullReindex()
761 return
762}
763
764func ImportFromLocalPath(boxID, localPath string, toPath string) (err error) {
765 util.PushEndlessProgress(Conf.Language(73))
766 defer func() {
767 util.PushClearProgress()
768
769 if e := recover(); nil != e {
770 stack := debug.Stack()
771 msg := fmt.Sprintf("PANIC RECOVERED: %v\n\t%s\n", e, stack)
772 logging.LogErrorf("import from local path failed: %s", msg)
773 err = errors.New("import from local path failed, please check kernel log for details")
774 }
775 }()
776
777 lockSync()
778 defer unlockSync()
779
780 FlushTxQueue()
781
782 var baseHPath, baseTargetPath, boxLocalPath string
783 if "/" == toPath {
784 baseHPath = "/"
785 baseTargetPath = "/"
786 } else {
787 block := treenode.GetBlockTreeRootByPath(boxID, toPath)
788 if nil == block {
789 logging.LogErrorf("not found block by path [%s]", toPath)
790 return nil
791 }
792 baseHPath = block.HPath
793 baseTargetPath = strings.TrimSuffix(block.Path, ".sy")
794 }
795 boxLocalPath = filepath.Join(util.DataDir, boxID)
796
797 hPathsIDs := map[string]string{}
798 idPaths := map[string]string{}
799 moveIDs := map[string]string{}
800 assetsDone := map[string]string{}
801 if gulu.File.IsDir(localPath) { // 导入文件夹
802 // 收集所有资源文件
803 assets := map[string]string{}
804 filelock.Walk(localPath, func(currentPath string, d fs.DirEntry, err error) error {
805 if err != nil {
806 return err
807 }
808 if d == nil {
809 return nil
810 }
811 if localPath == currentPath {
812 return nil
813 }
814 if strings.HasPrefix(d.Name(), ".") {
815 if d.IsDir() {
816 return filepath.SkipDir
817 }
818 return nil
819 }
820
821 if !strings.HasSuffix(d.Name(), ".md") && !strings.HasSuffix(d.Name(), ".markdown") {
822 assets[currentPath] = currentPath
823 return nil
824 }
825 return nil
826 })
827
828 targetPaths := map[string]string{}
829 count := 0
830 // md 转换 sy
831 filelock.Walk(localPath, func(currentPath string, d fs.DirEntry, err error) error {
832 if err != nil {
833 return err
834 }
835 if d == nil {
836 return nil
837 }
838 if strings.HasPrefix(d.Name(), ".") {
839 if d.IsDir() {
840 return filepath.SkipDir
841 }
842 return nil
843 }
844
845 var tree *parse.Tree
846 var ext string
847 title := d.Name()
848 if !d.IsDir() {
849 ext = util.Ext(d.Name())
850 title = strings.TrimSuffix(d.Name(), ext)
851 }
852 id := ast.NewNodeID()
853
854 curRelPath := filepath.ToSlash(strings.TrimPrefix(currentPath, localPath))
855 targetPath := path.Join(baseTargetPath, id)
856 hPath := path.Join(baseHPath, filepath.Base(localPath), filepath.ToSlash(strings.TrimPrefix(currentPath, localPath)))
857 hPath = strings.TrimSuffix(hPath, ext)
858 if "" == curRelPath {
859 curRelPath = "/"
860 hPath = "/" + title
861 } else {
862 dirPath := targetPaths[path.Dir(curRelPath)]
863 targetPath = path.Join(dirPath, id)
864 }
865
866 targetPath = strings.ReplaceAll(targetPath, ".sy/", "/")
867 targetPath += ".sy"
868 if _, ok := targetPaths[curRelPath]; !ok {
869 targetPaths[curRelPath] = targetPath
870 } else {
871 targetPath = targetPaths[curRelPath]
872 id = util.GetTreeID(targetPath)
873 }
874
875 if d.IsDir() {
876 if subMdFiles := util.GetFilePathsByExts(currentPath, []string{".md", ".markdown"}); 1 > len(subMdFiles) {
877 // 如果该文件夹中不包含 Markdown 文件则不处理 https://github.com/siyuan-note/siyuan/issues/11567
878 return nil
879 }
880
881 // 如果当前文件夹路径下包含同名的 Markdown 文件,则不创建空文档 https://github.com/siyuan-note/siyuan/issues/13149
882 if gulu.File.IsExist(currentPath+".md") || gulu.File.IsExist(currentPath+".markdown") {
883 targetPaths[curRelPath+".md"] = targetPath
884 return nil
885 }
886
887 tree = treenode.NewTree(boxID, targetPath, hPath, title)
888 importTrees = append(importTrees, tree)
889 return nil
890 }
891
892 if !strings.HasSuffix(d.Name(), ".md") && !strings.HasSuffix(d.Name(), ".markdown") {
893 return nil
894 }
895
896 data, readErr := os.ReadFile(currentPath)
897 if nil != readErr {
898 err = readErr
899 return io.EOF
900 }
901
902 tree, yfmRootID, yfmTitle, yfmUpdated := parseStdMd(data)
903 if nil == tree {
904 logging.LogErrorf("parse tree [%s] failed", currentPath)
905 return nil
906 }
907
908 if "" != yfmRootID {
909 moveIDs[id] = yfmRootID
910 id = yfmRootID
911 }
912 if "" != yfmTitle {
913 title = yfmTitle
914 }
915 unescapedTitle, unescapeErr := url.PathUnescape(title)
916 if nil == unescapeErr {
917 title = unescapedTitle
918 }
919 hPath = path.Join(path.Dir(hPath), title)
920 updated := yfmUpdated
921 fname := path.Base(targetPath)
922 targetPath = strings.ReplaceAll(targetPath, fname, id+".sy")
923 targetPaths[curRelPath] = targetPath
924
925 tree.ID = id
926 tree.Root.ID = id
927 tree.Root.SetIALAttr("id", tree.Root.ID)
928 tree.Root.SetIALAttr("title", title)
929 tree.Box = boxID
930 targetPath = path.Join(path.Dir(targetPath), tree.Root.ID+".sy")
931 tree.Path = targetPath
932 targetPaths[curRelPath] = targetPath
933 tree.HPath = hPath
934 tree.Root.Spec = "1"
935
936 docDirLocalPath := filepath.Dir(filepath.Join(boxLocalPath, targetPath))
937 assetDirPath := getAssetsDir(boxLocalPath, docDirLocalPath)
938 currentDir := filepath.Dir(currentPath)
939 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
940 if !entering || (ast.NodeLinkDest != n.Type && !n.IsTextMarkType("a")) {
941 return ast.WalkContinue
942 }
943
944 var dest string
945 if ast.NodeLinkDest == n.Type {
946 dest = n.TokensStr()
947 } else {
948 dest = n.TextMarkAHref
949 }
950
951 if strings.HasPrefix(dest, "data:image") && strings.Contains(dest, ";base64,") {
952 processBase64Img(n, dest, assetDirPath)
953 return ast.WalkContinue
954 }
955
956 decodedDest := string(html.DecodeDestination([]byte(dest)))
957 if decodedDest != dest {
958 dest = decodedDest
959 }
960 absolutePath := filepath.Join(currentDir, dest)
961
962 if ast.NodeLinkDest == n.Type {
963 n.Tokens = []byte(dest)
964 } else {
965 n.TextMarkAHref = dest
966 }
967 if !util.IsRelativePath(dest) {
968 return ast.WalkContinue
969 }
970 dest = filepath.ToSlash(dest)
971 if "" == dest {
972 return ast.WalkContinue
973 }
974
975 if !gulu.File.IsExist(absolutePath) {
976 return ast.WalkContinue
977 }
978
979 existName := assetsDone[absolutePath]
980 var name string
981 if "" == existName {
982 name = filepath.Base(absolutePath)
983 name = util.FilterUploadFileName(name)
984 name = util.AssetName(name, ast.NewNodeID())
985 assetTargetPath := filepath.Join(assetDirPath, name)
986 if err = filelock.Copy(absolutePath, assetTargetPath); err != nil {
987 logging.LogErrorf("copy asset from [%s] to [%s] failed: %s", absolutePath, assetTargetPath, err)
988 return ast.WalkContinue
989 }
990 assetsDone[absolutePath] = name
991 } else {
992 name = existName
993 }
994 if ast.NodeLinkDest == n.Type {
995 n.Tokens = []byte("assets/" + name)
996 } else {
997 n.TextMarkAHref = "assets/" + name
998 }
999 return ast.WalkContinue
1000 })
1001
1002 reassignIDUpdated(tree, id, updated)
1003 importTrees = append(importTrees, tree)
1004
1005 hPathsIDs[tree.HPath] = tree.ID
1006 idPaths[tree.ID] = tree.Path
1007
1008 count++
1009 if 0 == count%4 {
1010 util.PushEndlessProgress(fmt.Sprintf(Conf.language(70), fmt.Sprintf("%s", tree.HPath)))
1011 }
1012 return nil
1013 })
1014 } else { // 导入单个文件
1015 fileName := filepath.Base(localPath)
1016 if !strings.HasSuffix(fileName, ".md") && !strings.HasSuffix(fileName, ".markdown") {
1017 return errors.New(Conf.Language(79))
1018 }
1019
1020 title := strings.TrimSuffix(fileName, ".markdown")
1021 title = strings.TrimSuffix(title, ".md")
1022 targetPath := strings.TrimSuffix(toPath, ".sy")
1023 id := ast.NewNodeID()
1024 targetPath = path.Join(targetPath, id+".sy")
1025 var data []byte
1026 data, err = os.ReadFile(localPath)
1027 if err != nil {
1028 return err
1029 }
1030 tree, yfmRootID, yfmTitle, yfmUpdated := parseStdMd(data)
1031 if nil == tree {
1032 msg := fmt.Sprintf("parse tree [%s] failed", localPath)
1033 logging.LogErrorf(msg)
1034 return errors.New(msg)
1035 }
1036
1037 if "" != yfmRootID {
1038 id = yfmRootID
1039 }
1040 if "" != yfmTitle {
1041 title = yfmTitle
1042 }
1043 unescapedTitle, unescapeErr := url.PathUnescape(title)
1044 if nil == unescapeErr {
1045 title = unescapedTitle
1046 }
1047 updated := yfmUpdated
1048 fname := path.Base(targetPath)
1049 targetPath = strings.ReplaceAll(targetPath, fname, id+".sy")
1050
1051 tree.ID = id
1052 tree.Root.ID = id
1053 tree.Root.SetIALAttr("id", tree.Root.ID)
1054 tree.Root.SetIALAttr("title", title)
1055 tree.Box = boxID
1056 tree.Path = targetPath
1057 tree.HPath = path.Join(baseHPath, title)
1058 tree.Root.Spec = "1"
1059
1060 docDirLocalPath := filepath.Dir(filepath.Join(boxLocalPath, targetPath))
1061 assetDirPath := getAssetsDir(boxLocalPath, docDirLocalPath)
1062 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
1063 if !entering || (ast.NodeLinkDest != n.Type && !n.IsTextMarkType("a")) {
1064 return ast.WalkContinue
1065 }
1066
1067 var dest string
1068 if ast.NodeLinkDest == n.Type {
1069 dest = n.TokensStr()
1070 } else {
1071 dest = n.TextMarkAHref
1072 }
1073
1074 if strings.HasPrefix(dest, "data:image") && strings.Contains(dest, ";base64,") {
1075 processBase64Img(n, dest, assetDirPath)
1076 return ast.WalkContinue
1077 }
1078
1079 decodedDest := string(html.DecodeDestination([]byte(dest)))
1080 if decodedDest != dest {
1081 dest = decodedDest
1082 }
1083 absolutePath := filepath.Join(filepath.Dir(localPath), dest)
1084
1085 if ast.NodeLinkDest == n.Type {
1086 n.Tokens = []byte(dest)
1087 } else {
1088 n.TextMarkAHref = dest
1089 }
1090 if !util.IsRelativePath(dest) {
1091 return ast.WalkContinue
1092 }
1093 dest = filepath.ToSlash(dest)
1094 if "" == dest {
1095 return ast.WalkContinue
1096 }
1097
1098 if !gulu.File.IsExist(absolutePath) {
1099 return ast.WalkContinue
1100 }
1101
1102 existName := assetsDone[absolutePath]
1103 var name string
1104 if "" == existName {
1105 name = filepath.Base(absolutePath)
1106 name = util.FilterUploadFileName(name)
1107 name = util.AssetName(name, ast.NewNodeID())
1108 assetTargetPath := filepath.Join(assetDirPath, name)
1109 if err = filelock.Copy(absolutePath, assetTargetPath); err != nil {
1110 logging.LogErrorf("copy asset from [%s] to [%s] failed: %s", absolutePath, assetTargetPath, err)
1111 return ast.WalkContinue
1112 }
1113 assetsDone[absolutePath] = name
1114 } else {
1115 name = existName
1116 }
1117 if ast.NodeLinkDest == n.Type {
1118 n.Tokens = []byte("assets/" + name)
1119 } else {
1120 n.TextMarkAHref = "assets/" + name
1121 }
1122 return ast.WalkContinue
1123 })
1124
1125 reassignIDUpdated(tree, id, updated)
1126 importTrees = append(importTrees, tree)
1127 }
1128
1129 if 0 < len(importTrees) {
1130 for id, newID := range moveIDs {
1131 for _, importTree := range importTrees {
1132 importTree.ID = strings.ReplaceAll(importTree.ID, id, newID)
1133 importTree.Path = strings.ReplaceAll(importTree.Path, id, newID)
1134 }
1135 }
1136
1137 initSearchLinks()
1138 convertWikiLinksAndTags()
1139 buildBlockRefInText()
1140
1141 box := Conf.Box(boxID)
1142 for i, tree := range importTrees {
1143 indexWriteTreeIndexQueue(tree)
1144 if 0 == i%4 {
1145 util.PushEndlessProgress(fmt.Sprintf(Conf.Language(66), fmt.Sprintf("%d/%d ", i, len(importTrees))+tree.HPath))
1146 }
1147 }
1148 util.PushClearProgress()
1149
1150 importTrees = []*parse.Tree{}
1151 searchLinks = map[string]string{}
1152
1153 // 按照路径排序 Improve sort when importing markdown files https://github.com/siyuan-note/siyuan/issues/11390
1154 var hPaths []string
1155 for hPath := range hPathsIDs {
1156 hPaths = append(hPaths, hPath)
1157 }
1158 sort.Strings(hPaths)
1159 paths := map[string][]string{}
1160 for _, hPath := range hPaths {
1161 p := idPaths[hPathsIDs[hPath]]
1162 parent := path.Dir(p)
1163 for {
1164 if baseTargetPath == parent {
1165 break
1166 }
1167
1168 if ps, ok := paths[parent]; !ok {
1169 paths[parent] = []string{p}
1170 } else {
1171 ps = append(ps, p)
1172 ps = gulu.Str.RemoveDuplicatedElem(ps)
1173 paths[parent] = ps
1174 }
1175 p = parent
1176 parent = path.Dir(parent)
1177 }
1178 }
1179
1180 sortIDVals := map[string]int{}
1181 for _, ps := range paths {
1182 sortVal := 0
1183 for _, p := range ps {
1184 sortIDVals[util.GetTreeID(p)] = sortVal
1185 sortVal++
1186 }
1187 }
1188 box.setSort(sortIDVals)
1189 }
1190
1191 IncSync()
1192 debug.FreeOSMemory()
1193 return
1194}
1195
1196func parseStdMd(markdown []byte) (ret *parse.Tree, yfmRootID, yfmTitle, yfmUpdated string) {
1197 luteEngine := util.NewStdLute()
1198 luteEngine.SetYamlFrontMatter(true) // 解析 YAML Front Matter https://github.com/siyuan-note/siyuan/issues/10878
1199 ret = parse.Parse("", markdown, luteEngine.ParseOptions)
1200 if nil == ret {
1201 return
1202 }
1203 yfmRootID, yfmTitle, yfmUpdated = normalizeTree(ret)
1204 htmlBlock2Inline(ret)
1205 parse.TextMarks2Inlines(ret) // 先将 TextMark 转换为 Inlines https://github.com/siyuan-note/siyuan/issues/13056
1206 parse.NestedInlines2FlattedSpansHybrid(ret, false)
1207 return
1208}
1209
1210func processBase64Img(n *ast.Node, dest string, assetDirPath string) {
1211 base64TmpDir := filepath.Join(util.TempDir, "base64")
1212 os.MkdirAll(base64TmpDir, 0755)
1213
1214 sep := strings.Index(dest, ";base64,")
1215 str := strings.TrimSpace(dest[sep+8:])
1216 re := regexp.MustCompile(`(?i)%0A`)
1217 str = re.ReplaceAllString(str, "\n")
1218 var decodeErr error
1219 unbased, decodeErr := base64.StdEncoding.DecodeString(str)
1220 if nil != decodeErr {
1221 logging.LogErrorf("decode base64 image failed: %s", decodeErr)
1222 return
1223 }
1224 dataReader := bytes.NewReader(unbased)
1225 var img image.Image
1226 var ext string
1227 typ := dest[5:sep]
1228 switch typ {
1229 case "image/png":
1230 img, decodeErr = png.Decode(dataReader)
1231 ext = ".png"
1232 case "image/jpeg":
1233 img, decodeErr = jpeg.Decode(dataReader)
1234 ext = ".jpg"
1235 case "image/svg+xml":
1236 ext = ".svg"
1237 default:
1238 logging.LogWarnf("unsupported base64 image type [%s]", typ)
1239 return
1240 }
1241 if nil != decodeErr {
1242 logging.LogErrorf("decode base64 image failed: %s", decodeErr)
1243 return
1244 }
1245
1246 name := "image" + ext
1247 alt := n.Parent.ChildByType(ast.NodeLinkText)
1248 if nil != alt {
1249 name = alt.TokensStr() + ext
1250 }
1251 name = util.FilterUploadFileName(name)
1252 name = util.AssetName(name, ast.NewNodeID())
1253
1254 tmp := filepath.Join(base64TmpDir, name)
1255 tmpFile, openErr := os.OpenFile(tmp, os.O_RDWR|os.O_CREATE, 0644)
1256 if nil != openErr {
1257 logging.LogErrorf("open temp file [%s] failed: %s", tmp, openErr)
1258 return
1259 }
1260
1261 var encodeErr error
1262 switch typ {
1263 case "image/png":
1264 encodeErr = png.Encode(tmpFile, img)
1265 case "image/jpeg":
1266 encodeErr = jpeg.Encode(tmpFile, img, &jpeg.Options{Quality: 100})
1267 case "image/svg+xml":
1268 _, encodeErr = tmpFile.Write(unbased)
1269 }
1270 if nil != encodeErr {
1271 logging.LogErrorf("encode base64 image failed: %s", encodeErr)
1272 tmpFile.Close()
1273 return
1274 }
1275 tmpFile.Close()
1276
1277 assetTargetPath := filepath.Join(assetDirPath, name)
1278 if err := filelock.Copy(tmp, assetTargetPath); err != nil {
1279 logging.LogErrorf("copy asset from [%s] to [%s] failed: %s", tmp, assetTargetPath, err)
1280 return
1281 }
1282 n.Tokens = []byte("assets/" + name)
1283}
1284
1285func htmlBlock2Inline(tree *parse.Tree) {
1286 imgHtmlBlocks := map[*ast.Node]*html.Node{}
1287 aHtmlBlocks := map[*ast.Node]*html.Node{}
1288 var unlinks []*ast.Node
1289 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
1290 if !entering {
1291 return ast.WalkContinue
1292 }
1293
1294 if ast.NodeHTMLBlock == n.Type || (ast.NodeText == n.Type && bytes.HasPrefix(bytes.ToLower(n.Tokens), []byte("<img "))) {
1295 tokens := bytes.TrimSpace(n.Tokens)
1296 if bytes.HasPrefix(tokens, []byte("<div>")) {
1297 tokens = bytes.TrimPrefix(tokens, []byte("<div>"))
1298 }
1299 if bytes.HasSuffix(tokens, []byte("</div>")) {
1300 tokens = bytes.TrimSuffix(tokens, []byte("</div>"))
1301 }
1302 tokens = bytes.TrimSpace(tokens)
1303
1304 htmlNodes, pErr := html.ParseFragment(bytes.NewReader(tokens), &html.Node{Type: html.ElementNode})
1305 if nil != pErr {
1306 logging.LogErrorf("parse html block [%s] failed: %s", n.Tokens, pErr)
1307 return ast.WalkContinue
1308 }
1309 if 1 > len(htmlNodes) {
1310 return ast.WalkContinue
1311 }
1312
1313 for _, htmlNode := range htmlNodes {
1314 if atom.Img == htmlNode.DataAtom {
1315 imgHtmlBlocks[n] = htmlNode
1316 break
1317 }
1318 }
1319 }
1320 if ast.NodeHTMLBlock == n.Type || (ast.NodeText == n.Type && bytes.HasPrefix(bytes.ToLower(n.Tokens), []byte("<a "))) {
1321 tokens := bytes.TrimSpace(n.Tokens)
1322 if bytes.HasPrefix(tokens, []byte("<div>")) {
1323 tokens = bytes.TrimPrefix(tokens, []byte("<div>"))
1324 }
1325 if bytes.HasSuffix(tokens, []byte("</div>")) {
1326 tokens = bytes.TrimSuffix(tokens, []byte("</div>"))
1327 }
1328 tokens = bytes.TrimSpace(tokens)
1329
1330 if ast.NodeHTMLBlock != n.Type && nil != n.Next && nil != n.Next.Next {
1331 if ast.NodeText == n.Next.Next.Type && bytes.Equal(n.Next.Next.Tokens, []byte("</a>")) {
1332 tokens = append(tokens, n.Next.Tokens...)
1333 tokens = append(tokens, []byte("</a>")...)
1334 unlinks = append(unlinks, n.Next)
1335 unlinks = append(unlinks, n.Next.Next)
1336 }
1337 }
1338
1339 htmlNodes, pErr := html.ParseFragment(bytes.NewReader(tokens), &html.Node{Type: html.ElementNode})
1340 if nil != pErr {
1341 logging.LogErrorf("parse html block [%s] failed: %s", n.Tokens, pErr)
1342 return ast.WalkContinue
1343 }
1344 if 1 > len(htmlNodes) {
1345 return ast.WalkContinue
1346 }
1347
1348 for _, htmlNode := range htmlNodes {
1349 if atom.A == htmlNode.DataAtom {
1350 aHtmlBlocks[n] = htmlNode
1351 break
1352 }
1353 }
1354 }
1355 return ast.WalkContinue
1356 })
1357
1358 for n, htmlImg := range imgHtmlBlocks {
1359 src := domAttrValue(htmlImg, "src")
1360 alt := domAttrValue(htmlImg, "alt")
1361 title := domAttrValue(htmlImg, "title")
1362
1363 p := treenode.NewParagraph(n.ID)
1364 img := &ast.Node{Type: ast.NodeImage}
1365 p.AppendChild(img)
1366 img.AppendChild(&ast.Node{Type: ast.NodeBang})
1367 img.AppendChild(&ast.Node{Type: ast.NodeOpenBracket})
1368 img.AppendChild(&ast.Node{Type: ast.NodeLinkText, Tokens: []byte(alt)})
1369 img.AppendChild(&ast.Node{Type: ast.NodeCloseBracket})
1370 img.AppendChild(&ast.Node{Type: ast.NodeOpenParen})
1371 img.AppendChild(&ast.Node{Type: ast.NodeLinkDest, Tokens: []byte(src)})
1372 if "" != title {
1373 img.AppendChild(&ast.Node{Type: ast.NodeLinkSpace})
1374 img.AppendChild(&ast.Node{Type: ast.NodeLinkTitle})
1375 }
1376 img.AppendChild(&ast.Node{Type: ast.NodeCloseParen})
1377 if width := domAttrValue(htmlImg, "width"); "" != width {
1378 if util2.IsDigit(width) {
1379 width += "px"
1380 }
1381 style := "width: " + width + ";"
1382 ial := &ast.Node{Type: ast.NodeKramdownSpanIAL, Tokens: parse.IAL2Tokens([][]string{{"style", style}})}
1383 img.SetIALAttr("style", style)
1384 img.InsertAfter(ial)
1385 } else if height := domAttrValue(htmlImg, "height"); "" != height {
1386 if util2.IsDigit(height) {
1387 height += "px"
1388 }
1389 style := "height: " + height + ";"
1390 ial := &ast.Node{Type: ast.NodeKramdownSpanIAL, Tokens: parse.IAL2Tokens([][]string{{"style", style}})}
1391 img.SetIALAttr("style", style)
1392 img.InsertAfter(ial)
1393 }
1394
1395 if ast.NodeHTMLBlock == n.Type {
1396 n.InsertBefore(p)
1397 } else if ast.NodeText == n.Type {
1398 if nil != n.Parent {
1399 if n.Parent.IsContainerBlock() {
1400 n.InsertBefore(p)
1401 } else {
1402 n.InsertBefore(img)
1403 }
1404 } else {
1405 n.InsertBefore(p)
1406 }
1407 }
1408 unlinks = append(unlinks, n)
1409 }
1410
1411 for n, htmlA := range aHtmlBlocks {
1412 href := domAttrValue(htmlA, "href")
1413 title := domAttrValue(htmlA, "title")
1414 anchor := util2.DomText(htmlA)
1415
1416 p := treenode.NewParagraph(n.ID)
1417 a := &ast.Node{Type: ast.NodeLink}
1418 p.AppendChild(a)
1419 a.AppendChild(&ast.Node{Type: ast.NodeOpenBracket})
1420 a.AppendChild(&ast.Node{Type: ast.NodeLinkText, Tokens: []byte(anchor)})
1421 a.AppendChild(&ast.Node{Type: ast.NodeCloseBracket})
1422 a.AppendChild(&ast.Node{Type: ast.NodeOpenParen})
1423 a.AppendChild(&ast.Node{Type: ast.NodeLinkDest, Tokens: []byte(href)})
1424 if "" != title {
1425 a.AppendChild(&ast.Node{Type: ast.NodeLinkSpace})
1426 a.AppendChild(&ast.Node{Type: ast.NodeLinkTitle, Tokens: []byte(title)})
1427 }
1428 a.AppendChild(&ast.Node{Type: ast.NodeCloseParen})
1429
1430 if ast.NodeHTMLBlock == n.Type || (nil == n.Previous && (nil != n.Next && nil != n.Next.Next && nil == n.Next.Next.Next)) {
1431 n.InsertBefore(p)
1432 } else {
1433 n.InsertBefore(a)
1434 }
1435 unlinks = append(unlinks, n)
1436 }
1437
1438 for _, n := range unlinks {
1439 n.Unlink()
1440 }
1441 return
1442}
1443
1444func reassignIDUpdated(tree *parse.Tree, rootID, updated string) {
1445 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
1446 if !entering || "" == n.ID {
1447 return ast.WalkContinue
1448 }
1449
1450 n.ID = ast.NewNodeID()
1451 if ast.NodeDocument == n.Type && "" != rootID {
1452 n.ID = rootID
1453 }
1454
1455 n.SetIALAttr("id", n.ID)
1456 if "" != updated {
1457 n.SetIALAttr("updated", updated)
1458 if "" == rootID {
1459 n.ID = updated + "-" + gulu.Rand.String(7)
1460 n.SetIALAttr("id", n.ID)
1461 }
1462 } else {
1463 n.SetIALAttr("updated", util.TimeFromID(n.ID))
1464 }
1465 return ast.WalkContinue
1466 })
1467 tree.ID = tree.Root.ID
1468 tree.Path = path.Join(path.Dir(tree.Path), tree.ID+".sy")
1469 tree.Root.SetIALAttr("id", tree.Root.ID)
1470}
1471
1472func domAttrValue(n *html.Node, attrName string) string {
1473 if nil == n {
1474 return ""
1475 }
1476
1477 for _, attr := range n.Attr {
1478 if attr.Key == attrName {
1479 return attr.Val
1480 }
1481 }
1482 return ""
1483}
1484
1485var importTrees []*parse.Tree
1486var searchLinks = map[string]string{}
1487
1488func initSearchLinks() {
1489 for _, tree := range importTrees {
1490 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
1491 if !entering || (ast.NodeDocument != n.Type && ast.NodeHeading != n.Type) {
1492 return ast.WalkContinue
1493 }
1494
1495 nodePath := tree.HPath + "#"
1496 if ast.NodeHeading == n.Type {
1497 nodePath += n.Text()
1498 }
1499
1500 searchLinks[nodePath] = n.ID
1501 return ast.WalkContinue
1502 })
1503 }
1504}
1505
1506func convertWikiLinksAndTags() {
1507 for _, tree := range importTrees {
1508 convertWikiLinksAndTags0(tree)
1509 }
1510}
1511
1512func convertWikiLinksAndTags0(tree *parse.Tree) {
1513 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
1514 if !entering || ast.NodeText != n.Type {
1515 return ast.WalkContinue
1516 }
1517
1518 text := n.TokensStr()
1519 length := len(text)
1520 start, end := 0, length
1521 for {
1522 part := text[start:end]
1523 if idx := strings.Index(part, "]]"); 0 > idx {
1524 break
1525 } else {
1526 end = start + idx
1527 }
1528 if idx := strings.Index(part, "[["); 0 > idx {
1529 break
1530 } else {
1531 start += idx
1532 }
1533 if end <= start {
1534 break
1535 }
1536
1537 link := path.Join(path.Dir(tree.HPath), text[start+2:end]) // 统一转为绝对路径方便后续查找
1538 linkText := path.Base(link)
1539 dynamicAnchorText := true
1540 if linkParts := strings.Split(link, "|"); 1 < len(linkParts) {
1541 link = linkParts[0]
1542 linkText = linkParts[1]
1543 dynamicAnchorText = false
1544 }
1545 link, linkText = strings.TrimSpace(link), strings.TrimSpace(linkText)
1546 if !strings.Contains(link, "#") {
1547 link += "#" // 在结尾统一带上锚点方便后续查找
1548 }
1549
1550 id := searchLinkID(link)
1551 if "" == id {
1552 start, end = end, length
1553 continue
1554 }
1555
1556 linkText = strings.TrimPrefix(linkText, "/")
1557 repl := "((" + id + " '" + linkText + "'))"
1558 if !dynamicAnchorText {
1559 repl = "((" + id + " \"" + linkText + "\"))"
1560 }
1561 end += 2
1562 text = text[:start] + repl + text[end:]
1563 start, end = start+len(repl), len(text)
1564 length = end
1565 }
1566
1567 text = convertTags(text) // 导入标签语法
1568 n.Tokens = gulu.Str.ToBytes(text)
1569 return ast.WalkContinue
1570 })
1571}
1572
1573func convertTags(text string) (ret string) {
1574 if !util.MarkdownSettings.InlineTag {
1575 return text
1576 }
1577
1578 pos, i := -1, 0
1579 tokens := []byte(text)
1580 for ; i < len(tokens); i++ {
1581 if '#' == tokens[i] && (0 == i || ' ' == tokens[i-1] || (-1 < pos && '#' == tokens[pos])) {
1582 if i < len(tokens)-1 && '#' == tokens[i+1] {
1583 pos = -1
1584 continue
1585 }
1586 pos = i
1587 continue
1588 }
1589
1590 if -1 < pos && ' ' == tokens[i] {
1591 tokens = append(tokens, 0)
1592 copy(tokens[i+1:], tokens[i:])
1593 tokens[i] = '#'
1594 pos = -1
1595 i++
1596 }
1597 }
1598 if -1 < pos && pos < i {
1599 tokens = append(tokens, '#')
1600 }
1601 return string(tokens)
1602}
1603
1604// buildBlockRefInText 将文本节点进行结构化处理。
1605func buildBlockRefInText() {
1606 luteEngine := NewLute()
1607 luteEngine.SetHTMLTag2TextMark(true)
1608 for _, tree := range importTrees {
1609 tree.MergeText()
1610
1611 var unlinkTextNodes []*ast.Node
1612 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
1613 if !entering || ast.NodeText != n.Type {
1614 return ast.WalkContinue
1615 }
1616
1617 if nil == n.Tokens {
1618 return ast.WalkContinue
1619 }
1620
1621 t := parse.Inline("", n.Tokens, luteEngine.ParseOptions) // 使用行级解析
1622 parse.NestedInlines2FlattedSpans(t, false)
1623 var children []*ast.Node
1624 for c := t.Root.FirstChild.FirstChild; nil != c; c = c.Next {
1625 children = append(children, c)
1626 }
1627 for _, c := range children {
1628 n.InsertBefore(c)
1629 }
1630 unlinkTextNodes = append(unlinkTextNodes, n)
1631 return ast.WalkContinue
1632 })
1633
1634 for _, node := range unlinkTextNodes {
1635 node.Unlink()
1636 }
1637 }
1638}
1639
1640func searchLinkID(link string) (id string) {
1641 id = searchLinks[link]
1642 if "" != id {
1643 return
1644 }
1645
1646 baseName := path.Base(link)
1647 for searchLink, searchID := range searchLinks {
1648 if path.Base(searchLink) == baseName {
1649 return searchID
1650 }
1651 }
1652 return
1653}