A privacy-first, self-hosted, fully open source personal knowledge management software, written in typescript and golang. (PERSONAL FORK)
at lambda-fork/main 1653 lines 48 kB view raw
1// SiYuan - Refactor your thinking 2// Copyright (c) 2020-present, b3log.org 3// 4// This program is free software: you can redistribute it and/or modify 5// it under the terms of the GNU Affero General Public License as published by 6// the Free Software Foundation, either version 3 of the License, or 7// (at your option) any later version. 8// 9// This program is distributed in the hope that it will be useful, 10// but WITHOUT ANY WARRANTY; without even the implied warranty of 11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12// GNU Affero General Public License for more details. 13// 14// You should have received a copy of the GNU Affero General Public License 15// along with this program. If not, see <https://www.gnu.org/licenses/>. 16 17package model 18 19import ( 20 "bytes" 21 "encoding/base64" 22 "encoding/json" 23 "errors" 24 "fmt" 25 "image" 26 "image/jpeg" 27 "image/png" 28 "io" 29 "io/fs" 30 "net/url" 31 "os" 32 "path" 33 "path/filepath" 34 "regexp" 35 "runtime/debug" 36 "sort" 37 "strings" 38 39 "github.com/88250/gulu" 40 "github.com/88250/lute" 41 "github.com/88250/lute/ast" 42 "github.com/88250/lute/html" 43 "github.com/88250/lute/html/atom" 44 "github.com/88250/lute/parse" 45 "github.com/88250/lute/render" 46 util2 "github.com/88250/lute/util" 47 "github.com/siyuan-note/dataparser" 48 "github.com/siyuan-note/filelock" 49 "github.com/siyuan-note/logging" 50 "github.com/siyuan-note/riff" 51 "github.com/siyuan-note/siyuan/kernel/av" 52 "github.com/siyuan-note/siyuan/kernel/filesys" 53 "github.com/siyuan-note/siyuan/kernel/sql" 54 "github.com/siyuan-note/siyuan/kernel/task" 55 "github.com/siyuan-note/siyuan/kernel/treenode" 56 "github.com/siyuan-note/siyuan/kernel/util" 57) 58 59func HTML2Markdown(htmlStr string, luteEngine *lute.Lute) (markdown string, withMath bool, err error) { 60 tree, withMath := HTML2Tree(htmlStr, luteEngine) 61 62 var formatted []byte 63 renderer := render.NewFormatRenderer(tree, luteEngine.RenderOptions) 64 for nodeType, rendererFunc := range luteEngine.HTML2MdRendererFuncs { 65 renderer.ExtRendererFuncs[nodeType] = rendererFunc 66 } 67 formatted = renderer.Render() 68 markdown = gulu.Str.FromBytes(formatted) 69 return 70} 71 72func HTML2Tree(htmlStr string, luteEngine *lute.Lute) (tree *parse.Tree, withMath bool) { 73 htmlStr = gulu.Str.RemovePUA(htmlStr) 74 assetDirPath := filepath.Join(util.DataDir, "assets") 75 tree = luteEngine.HTML2Tree(htmlStr) 76 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { 77 if !entering { 78 return ast.WalkContinue 79 } 80 81 if ast.NodeText == n.Type { 82 if n.ParentIs(ast.NodeTableCell) { 83 n.Tokens = bytes.ReplaceAll(n.Tokens, []byte("\\|"), []byte("|")) 84 n.Tokens = bytes.ReplaceAll(n.Tokens, []byte("|"), []byte("\\|")) 85 n.Tokens = bytes.ReplaceAll(n.Tokens, []byte("\\<br /\\>"), []byte("<br />")) 86 } 87 } 88 89 if ast.NodeInlineMath == n.Type { 90 withMath = true 91 return ast.WalkContinue 92 } 93 94 if ast.NodeLinkDest != n.Type { 95 return ast.WalkContinue 96 } 97 98 dest := n.TokensStr() 99 if strings.HasPrefix(dest, "data:image") && strings.Contains(dest, ";base64,") { 100 processBase64Img(n, dest, assetDirPath) 101 return ast.WalkContinue 102 } 103 return ast.WalkContinue 104 }) 105 return 106} 107 108func ImportSY(zipPath, boxID, toPath string) (err error) { 109 util.PushEndlessProgress(Conf.Language(73)) 110 defer util.ClearPushProgress(100) 111 112 lockSync() 113 defer unlockSync() 114 115 baseName := filepath.Base(zipPath) 116 ext := filepath.Ext(baseName) 117 baseName = strings.TrimSuffix(baseName, ext) 118 unzipPath := filepath.Join(filepath.Dir(zipPath), baseName+"-"+gulu.Rand.String(7)) 119 err = gulu.Zip.Unzip(zipPath, unzipPath) 120 if err != nil { 121 return 122 } 123 defer os.RemoveAll(unzipPath) 124 125 var syPaths []string 126 filelock.Walk(unzipPath, func(path string, d fs.DirEntry, err error) error { 127 if err != nil { 128 return err 129 } 130 if d == nil { 131 return nil 132 } 133 if !d.IsDir() && strings.HasSuffix(d.Name(), ".sy") { 134 syPaths = append(syPaths, path) 135 } 136 return nil 137 }) 138 139 entries, err := os.ReadDir(unzipPath) 140 if err != nil { 141 logging.LogErrorf("read unzip dir [%s] failed: %s", unzipPath, err) 142 return 143 } 144 if 1 != len(entries) { 145 logging.LogErrorf("invalid .sy.zip [%v]", entries) 146 return errors.New(Conf.Language(199)) 147 } 148 unzipRootPath := filepath.Join(unzipPath, entries[0].Name()) 149 name := filepath.Base(unzipRootPath) 150 if strings.HasPrefix(name, "data-20") && len("data-20230321175442") == len(name) { 151 logging.LogErrorf("invalid .sy.zip [unzipRootPath=%s, baseName=%s]", unzipRootPath, name) 152 return errors.New(Conf.Language(199)) 153 } 154 155 luteEngine := util.NewLute() 156 blockIDs := map[string]string{} 157 trees := map[string]*parse.Tree{} 158 159 // 重新生成块 ID 160 for i, syPath := range syPaths { 161 data, readErr := os.ReadFile(syPath) 162 if nil != readErr { 163 logging.LogErrorf("read .sy [%s] failed: %s", syPath, readErr) 164 err = readErr 165 return 166 } 167 tree, _, parseErr := dataparser.ParseJSON(data, luteEngine.ParseOptions) 168 if nil != parseErr { 169 logging.LogErrorf("parse .sy [%s] failed: %s", syPath, parseErr) 170 err = parseErr 171 return 172 } 173 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { 174 if !entering || "" == n.ID { 175 return ast.WalkContinue 176 } 177 178 // 新 ID 保留时间部分,仅修改随机值,避免时间变化导致更新时间早于创建时间 179 // Keep original creation time when importing .sy.zip https://github.com/siyuan-note/siyuan/issues/9923 180 newNodeID := util.TimeFromID(n.ID) + "-" + util.RandString(7) 181 blockIDs[n.ID] = newNodeID 182 n.ID = newNodeID 183 n.SetIALAttr("id", newNodeID) 184 185 if icon := n.IALAttr("icon"); "" != icon { 186 // XSS through emoji name https://github.com/siyuan-note/siyuan/issues/15034 187 icon = util.FilterUploadEmojiFileName(icon) 188 n.SetIALAttr("icon", icon) 189 } 190 191 return ast.WalkContinue 192 }) 193 tree.ID = tree.Root.ID 194 tree.Path = filepath.ToSlash(strings.TrimPrefix(syPath, unzipRootPath)) 195 trees[tree.ID] = tree 196 util.PushEndlessProgress(Conf.language(73) + " " + fmt.Sprintf(Conf.language(70), fmt.Sprintf("%d/%d", i+1, len(syPaths)))) 197 } 198 199 // 引用和嵌入指向重新生成的块 ID 200 for _, tree := range trees { 201 util.PushEndlessProgress(Conf.language(73) + " " + fmt.Sprintf(Conf.language(70), tree.Root.IALAttr("title"))) 202 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { 203 if !entering { 204 return ast.WalkContinue 205 } 206 207 if treenode.IsBlockRef(n) { 208 defID, _, _ := treenode.GetBlockRef(n) 209 newDefID := blockIDs[defID] 210 if "" != newDefID { 211 n.TextMarkBlockRefID = newDefID 212 } 213 } else if ast.NodeTextMark == n.Type && n.IsTextMarkType("a") && strings.HasPrefix(n.TextMarkAHref, "siyuan://blocks/") { 214 // Block hyperlinks do not point to regenerated block IDs when importing .sy.zip https://github.com/siyuan-note/siyuan/issues/9083 215 defID := strings.TrimPrefix(n.TextMarkAHref, "siyuan://blocks/") 216 newDefID := blockIDs[defID] 217 if "" != newDefID { 218 n.TextMarkAHref = "siyuan://blocks/" + newDefID 219 } 220 } else if ast.NodeBlockQueryEmbedScript == n.Type { 221 for oldID, newID := range blockIDs { 222 // 导入 `.sy.zip` 后查询嵌入块失效 https://github.com/siyuan-note/siyuan/issues/5316 223 n.Tokens = bytes.ReplaceAll(n.Tokens, []byte(oldID), []byte(newID)) 224 } 225 } 226 return ast.WalkContinue 227 }) 228 } 229 230 var replacements []string 231 for oldID, newID := range blockIDs { 232 replacements = append(replacements, oldID, newID) 233 } 234 blockIDReplacer := strings.NewReplacer(replacements...) 235 236 // 将关联的数据库文件移动到 data/storage/av/ 下 237 storage := filepath.Join(unzipRootPath, "storage") 238 storageAvDir := filepath.Join(storage, "av") 239 avIDs := map[string]string{} 240 renameAvPaths := map[string]string{} 241 if gulu.File.IsExist(storageAvDir) { 242 // 重新生成数据库数据 243 filelock.Walk(storageAvDir, func(path string, d fs.DirEntry, err error) error { 244 if err != nil { 245 return err 246 } 247 if d == nil { 248 return nil 249 } 250 if !strings.HasSuffix(path, ".json") || !ast.IsNodeIDPattern(strings.TrimSuffix(d.Name(), ".json")) { 251 return nil 252 } 253 254 // 重命名数据库 255 newAvID := ast.NewNodeID() 256 oldAvID := strings.TrimSuffix(d.Name(), ".json") 257 newPath := filepath.Join(filepath.Dir(path), newAvID+".json") 258 renameAvPaths[path] = newPath 259 avIDs[oldAvID] = newAvID 260 return nil 261 }) 262 263 // 重命名数据库文件 264 for oldPath, newPath := range renameAvPaths { 265 data, readErr := os.ReadFile(oldPath) 266 if nil != readErr { 267 logging.LogErrorf("read av file [%s] failed: %s", oldPath, readErr) 268 return nil 269 } 270 271 // 将数据库文件中的 ID 替换为新的 ID 272 newData := data 273 for oldAvID, newAvID := range avIDs { 274 newData = bytes.ReplaceAll(newData, []byte(oldAvID), []byte(newAvID)) 275 } 276 newData = []byte(blockIDReplacer.Replace(string(newData))) 277 if !bytes.Equal(data, newData) { 278 if writeErr := os.WriteFile(oldPath, newData, 0644); nil != writeErr { 279 logging.LogErrorf("write av file [%s] failed: %s", oldPath, writeErr) 280 return nil 281 } 282 } 283 284 if err = os.Rename(oldPath, newPath); err != nil { 285 logging.LogErrorf("rename av file from [%s] to [%s] failed: %s", oldPath, newPath, err) 286 return 287 } 288 } 289 290 targetStorageAvDir := filepath.Join(util.DataDir, "storage", "av") 291 if copyErr := filelock.Copy(storageAvDir, targetStorageAvDir); nil != copyErr { 292 logging.LogErrorf("copy storage av dir from [%s] to [%s] failed: %s", storageAvDir, targetStorageAvDir, copyErr) 293 } 294 295 // 重新指向数据库属性值 296 for _, tree := range trees { 297 util.PushEndlessProgress(Conf.language(73) + " " + fmt.Sprintf(Conf.language(70), tree.Root.IALAttr("title"))) 298 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { 299 if !entering || "" == n.ID { 300 return ast.WalkContinue 301 } 302 303 ial := parse.IAL2Map(n.KramdownIAL) 304 for k, v := range ial { 305 if strings.HasPrefix(k, av.NodeAttrNameAvs) { 306 newKey, newVal := k, v 307 for oldAvID, newAvID := range avIDs { 308 newKey = strings.ReplaceAll(newKey, oldAvID, newAvID) 309 newVal = strings.ReplaceAll(newVal, oldAvID, newAvID) 310 } 311 n.RemoveIALAttr(k) 312 n.SetIALAttr(newKey, newVal) 313 } 314 } 315 316 if ast.NodeAttributeView == n.Type { 317 n.AttributeViewID = avIDs[n.AttributeViewID] 318 } 319 return ast.WalkContinue 320 }) 321 322 // 关联数据库和块 323 avNodes := tree.Root.ChildrenByType(ast.NodeAttributeView) 324 av.BatchUpsertBlockRel(avNodes) 325 } 326 327 // 如果数据库中绑定的块不在导入的文档中,则需要单独更新这些绑定块的属性 328 var attrViewIDs []string 329 for _, avID := range avIDs { 330 attrViewIDs = append(attrViewIDs, avID) 331 } 332 updateBoundBlockAvsAttribute(attrViewIDs) 333 334 // 插入关联关系 https://github.com/siyuan-note/siyuan/issues/11628 335 relationAvs := map[string]string{} 336 for _, avID := range avIDs { 337 attrView, _ := av.ParseAttributeView(avID) 338 if nil == attrView { 339 continue 340 } 341 342 for _, keyValues := range attrView.KeyValues { 343 if nil != keyValues.Key && av.KeyTypeRelation == keyValues.Key.Type && nil != keyValues.Key.Relation { 344 relationAvs[avID] = keyValues.Key.Relation.AvID 345 } 346 } 347 } 348 349 for srcAvID, destAvID := range relationAvs { 350 av.UpsertAvBackRel(srcAvID, destAvID) 351 } 352 } 353 354 // 将关联的闪卡数据合并到默认卡包 data/storage/riff/20230218211946-2kw8jgx 中 355 storageRiffDir := filepath.Join(storage, "riff") 356 if gulu.File.IsExist(storageRiffDir) { 357 deckToImport, loadErr := riff.LoadDeck(storageRiffDir, builtinDeckID, Conf.Flashcard.RequestRetention, Conf.Flashcard.MaximumInterval, Conf.Flashcard.Weights) 358 if nil != loadErr { 359 logging.LogErrorf("load deck [%s] failed: %s", name, loadErr) 360 } else { 361 deck := Decks[builtinDeckID] 362 if nil == deck { 363 var createErr error 364 deck, createErr = createDeck0("Built-in Deck", builtinDeckID) 365 if nil == createErr { 366 Decks[deck.ID] = deck 367 } 368 } 369 370 bIDs := deckToImport.GetBlockIDs() 371 cards := deckToImport.GetCardsByBlockIDs(bIDs) 372 for _, card := range cards { 373 deck.AddCard(ast.NewNodeID(), blockIDs[card.BlockID()]) 374 } 375 376 if 0 < len(cards) { 377 if saveErr := deck.Save(); nil != saveErr { 378 logging.LogErrorf("save deck [%s] failed: %s", name, saveErr) 379 } 380 } 381 } 382 } 383 384 // storage 文件夹已在上方处理,所以这里删除源 storage 文件夹,避免后面被拷贝到导入目录下 targetDir 385 if removeErr := os.RemoveAll(storage); nil != removeErr { 386 logging.LogErrorf("remove temp storage av dir failed: %s", removeErr) 387 } 388 389 if 1 > len(avIDs) { // 如果本次没有导入数据库,则清理掉文档中的数据库属性 https://github.com/siyuan-note/siyuan/issues/13011 390 for _, tree := range trees { 391 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { 392 if !entering || !n.IsBlock() { 393 return ast.WalkContinue 394 } 395 396 n.RemoveIALAttr(av.NodeAttrNameAvs) 397 return ast.WalkContinue 398 }) 399 } 400 } 401 402 // 写回 .sy 403 for _, tree := range trees { 404 util.PushEndlessProgress(Conf.language(73) + " " + fmt.Sprintf(Conf.language(70), tree.Root.IALAttr("title"))) 405 syPath := filepath.Join(unzipRootPath, tree.Path) 406 if "" == tree.Root.Spec { 407 parse.NestedInlines2FlattedSpans(tree, false) 408 tree.Root.Spec = "1" 409 } 410 renderer := render.NewJSONRenderer(tree, luteEngine.RenderOptions) 411 data := renderer.Render() 412 413 if !util.UseSingleLineSave { 414 buf := bytes.Buffer{} 415 buf.Grow(1024 * 1024 * 2) 416 if err = json.Indent(&buf, data, "", "\t"); err != nil { 417 return 418 } 419 data = buf.Bytes() 420 } 421 422 if err = os.WriteFile(syPath, data, 0644); err != nil { 423 logging.LogErrorf("write .sy [%s] failed: %s", syPath, err) 424 return 425 } 426 newSyPath := filepath.Join(filepath.Dir(syPath), tree.ID+".sy") 427 if err = filelock.Rename(syPath, newSyPath); err != nil { 428 logging.LogErrorf("rename .sy from [%s] to [%s] failed: %s", syPath, newSyPath, err) 429 return 430 } 431 } 432 433 // 合并 sort.json 434 fullSortIDs := map[string]int{} 435 sortIDs := map[string]int{} 436 var sortData []byte 437 var sortErr error 438 sortPath := filepath.Join(unzipRootPath, ".siyuan", "sort.json") 439 if filelock.IsExist(sortPath) { 440 sortData, sortErr = filelock.ReadFile(sortPath) 441 if nil != sortErr { 442 logging.LogErrorf("read import sort conf failed: %s", sortErr) 443 } 444 445 if sortErr = gulu.JSON.UnmarshalJSON(sortData, &sortIDs); nil != sortErr { 446 logging.LogErrorf("unmarshal sort conf failed: %s", sortErr) 447 } 448 449 boxSortPath := filepath.Join(util.DataDir, boxID, ".siyuan", "sort.json") 450 if filelock.IsExist(boxSortPath) { 451 sortData, sortErr = filelock.ReadFile(boxSortPath) 452 if nil != sortErr { 453 logging.LogErrorf("read box sort conf failed: %s", sortErr) 454 } 455 456 if sortErr = gulu.JSON.UnmarshalJSON(sortData, &fullSortIDs); nil != sortErr { 457 logging.LogErrorf("unmarshal box sort conf failed: %s", sortErr) 458 } 459 } 460 461 for oldID, sort := range sortIDs { 462 if newID := blockIDs[oldID]; "" != newID { 463 fullSortIDs[newID] = sort 464 } 465 } 466 467 sortData, sortErr = gulu.JSON.MarshalJSON(fullSortIDs) 468 if nil != sortErr { 469 logging.LogErrorf("marshal box full sort conf failed: %s", sortErr) 470 } else { 471 sortErr = filelock.WriteFile(boxSortPath, sortData) 472 if nil != sortErr { 473 logging.LogErrorf("write box full sort conf failed: %s", sortErr) 474 } 475 } 476 if removeErr := os.RemoveAll(sortPath); nil != removeErr { 477 logging.LogErrorf("remove temp sort conf failed: %s", removeErr) 478 } 479 } 480 481 // 重命名文件路径 482 renamePaths := map[string]string{} 483 filelock.Walk(unzipRootPath, func(path string, d fs.DirEntry, err error) error { 484 if err != nil { 485 return err 486 } 487 if d == nil { 488 return nil 489 } 490 if d.IsDir() && ast.IsNodeIDPattern(d.Name()) { 491 renamePaths[path] = path 492 } 493 return nil 494 }) 495 for p, _ := range renamePaths { 496 originalPath := p 497 p = strings.TrimPrefix(p, unzipRootPath) 498 p = filepath.ToSlash(p) 499 parts := strings.Split(p, "/") 500 buf := bytes.Buffer{} 501 buf.WriteString("/") 502 for i, part := range parts { 503 if "" == part { 504 continue 505 } 506 newNodeID := blockIDs[part] 507 if "" != newNodeID { 508 buf.WriteString(newNodeID) 509 } else { 510 buf.WriteString(part) 511 } 512 if i < len(parts)-1 { 513 buf.WriteString("/") 514 } 515 } 516 newPath := buf.String() 517 renamePaths[originalPath] = filepath.Join(unzipRootPath, newPath) 518 } 519 520 var oldPaths []string 521 for oldPath, _ := range renamePaths { 522 oldPaths = append(oldPaths, oldPath) 523 } 524 sort.Slice(oldPaths, func(i, j int) bool { 525 return strings.Count(oldPaths[i], string(os.PathSeparator)) < strings.Count(oldPaths[j], string(os.PathSeparator)) 526 }) 527 for i, oldPath := range oldPaths { 528 newPath := renamePaths[oldPath] 529 if err = filelock.Rename(oldPath, newPath); err != nil { 530 logging.LogErrorf("rename path from [%s] to [%s] failed: %s", oldPath, renamePaths[oldPath], err) 531 return errors.New("rename path failed") 532 } 533 534 delete(renamePaths, oldPath) 535 var toRemoves []string 536 newRenamedPaths := map[string]string{} 537 for oldP, newP := range renamePaths { 538 if strings.HasPrefix(oldP, oldPath) { 539 renamedOldP := strings.Replace(oldP, oldPath, newPath, 1) 540 newRenamedPaths[renamedOldP] = newP 541 toRemoves = append(toRemoves, oldPath) 542 } 543 } 544 for _, toRemove := range toRemoves { 545 delete(renamePaths, toRemove) 546 } 547 for oldP, newP := range newRenamedPaths { 548 renamePaths[oldP] = newP 549 } 550 for j := i + 1; j < len(oldPaths); j++ { 551 if strings.HasPrefix(oldPaths[j], oldPath) { 552 renamedOldP := strings.Replace(oldPaths[j], oldPath, newPath, 1) 553 oldPaths[j] = renamedOldP 554 } 555 } 556 } 557 558 // 将包含的资源文件统一移动到 data/assets/ 下 559 var assetsDirs []string 560 filelock.Walk(unzipRootPath, func(path string, d fs.DirEntry, err error) error { 561 if err != nil { 562 return err 563 } 564 if d == nil { 565 return nil 566 } 567 if strings.Contains(path, "assets") && d.IsDir() { 568 assetsDirs = append(assetsDirs, path) 569 } 570 return nil 571 }) 572 dataAssets := filepath.Join(util.DataDir, "assets") 573 for _, assets := range assetsDirs { 574 if gulu.File.IsDir(assets) { 575 if err = filelock.Copy(assets, dataAssets); err != nil { 576 logging.LogErrorf("copy assets from [%s] to [%s] failed: %s", assets, dataAssets, err) 577 return 578 } 579 } 580 os.RemoveAll(assets) 581 } 582 583 // 将包含的自定义表情统一移动到 data/emojis/ 下 584 unzipRootEmojisPath := filepath.Join(unzipRootPath, "emojis") 585 filelock.Walk(unzipRootEmojisPath, func(path string, d fs.DirEntry, err error) error { 586 if err != nil { 587 return err 588 } 589 if d == nil { 590 return nil 591 } 592 if !util.IsValidUploadFileName(d.Name()) { 593 emojiFullName := path 594 fullPathFilteredName := filepath.Join(filepath.Dir(path), util.FilterUploadEmojiFileName(d.Name())) 595 // XSS through emoji name https://github.com/siyuan-note/siyuan/issues/15034 596 logging.LogWarnf("renaming invalid custom emoji file [%s] to [%s]", d.Name(), fullPathFilteredName) 597 if removeErr := filelock.Rename(emojiFullName, fullPathFilteredName); nil != removeErr { 598 logging.LogErrorf("renaming invalid custom emoji file to [%s] failed: %s", fullPathFilteredName, removeErr) 599 } 600 } 601 return nil 602 }) 603 var emojiDirs []string 604 filelock.Walk(unzipRootPath, func(path string, d fs.DirEntry, err error) error { 605 if err != nil { 606 return err 607 } 608 if d == nil { 609 return nil 610 } 611 if strings.Contains(path, "emojis") && d.IsDir() { 612 emojiDirs = append(emojiDirs, path) 613 } 614 return nil 615 }) 616 dataEmojis := filepath.Join(util.DataDir, "emojis") 617 for _, emojis := range emojiDirs { 618 if gulu.File.IsDir(emojis) { 619 if err = filelock.Copy(emojis, dataEmojis); err != nil { 620 logging.LogErrorf("copy emojis from [%s] to [%s] failed: %s", emojis, dataEmojis, err) 621 return 622 } 623 } 624 os.RemoveAll(emojis) 625 } 626 627 var baseTargetPath string 628 if "/" == toPath { 629 baseTargetPath = "/" 630 } else { 631 block := treenode.GetBlockTreeRootByPath(boxID, toPath) 632 if nil == block { 633 logging.LogErrorf("not found block by path [%s]", toPath) 634 return nil 635 } 636 baseTargetPath = strings.TrimSuffix(block.Path, ".sy") 637 } 638 639 targetDir := filepath.Join(util.DataDir, boxID, baseTargetPath) 640 if err = os.MkdirAll(targetDir, 0755); err != nil { 641 return 642 } 643 644 var treePaths []string 645 filelock.Walk(unzipRootPath, func(path string, d fs.DirEntry, err error) error { 646 if err != nil { 647 return err 648 } 649 if d == nil { 650 return nil 651 } 652 if d.IsDir() { 653 if strings.HasPrefix(d.Name(), ".") { 654 return filepath.SkipDir 655 } 656 return nil 657 } 658 659 if !strings.HasSuffix(d.Name(), ".sy") { 660 return nil 661 } 662 663 p := strings.TrimPrefix(path, unzipRootPath) 664 p = filepath.ToSlash(p) 665 treePaths = append(treePaths, p) 666 return nil 667 }) 668 669 if err = filelock.Copy(unzipRootPath, targetDir); err != nil { 670 logging.LogErrorf("copy data dir from [%s] to [%s] failed: %s", unzipRootPath, util.DataDir, err) 671 err = errors.New("copy data failed") 672 return 673 } 674 675 boxAbsPath := filepath.Join(util.DataDir, boxID) 676 for _, treePath := range treePaths { 677 absPath := filepath.Join(targetDir, treePath) 678 p := strings.TrimPrefix(absPath, boxAbsPath) 679 p = filepath.ToSlash(p) 680 tree, err := filesys.LoadTree(boxID, p, luteEngine) 681 if err != nil { 682 logging.LogErrorf("load tree [%s] failed: %s", treePath, err) 683 continue 684 } 685 686 treenode.IndexBlockTree(tree) 687 sql.IndexTreeQueue(tree) 688 util.PushEndlessProgress(Conf.language(73) + " " + fmt.Sprintf(Conf.language(70), tree.Root.IALAttr("title"))) 689 } 690 691 IncSync() 692 693 task.AppendTask(task.UpdateIDs, util.PushUpdateIDs, blockIDs) 694 return 695} 696 697func ImportData(zipPath string) (err error) { 698 util.PushEndlessProgress(Conf.Language(73)) 699 defer util.ClearPushProgress(100) 700 701 lockSync() 702 defer unlockSync() 703 704 logging.LogInfof("import data from [%s]", zipPath) 705 baseName := filepath.Base(zipPath) 706 ext := filepath.Ext(baseName) 707 baseName = strings.TrimSuffix(baseName, ext) 708 unzipPath := filepath.Join(filepath.Dir(zipPath), baseName) 709 err = gulu.Zip.Unzip(zipPath, unzipPath) 710 if err != nil { 711 return 712 } 713 defer os.RemoveAll(unzipPath) 714 715 files, err := filepath.Glob(filepath.Join(unzipPath, "*/*.sy")) 716 if err != nil { 717 logging.LogErrorf("check data.zip failed: %s", err) 718 return errors.New("check data.zip failed") 719 } 720 if 0 < len(files) { 721 return errors.New(Conf.Language(198)) 722 } 723 dirs, err := os.ReadDir(unzipPath) 724 if err != nil { 725 logging.LogErrorf("check data.zip failed: %s", err) 726 return errors.New("check data.zip failed") 727 } 728 if 1 != len(dirs) { 729 return errors.New(Conf.Language(198)) 730 } 731 732 tmpDataPath := filepath.Join(unzipPath, dirs[0].Name()) 733 tmpDataEmojisPath := filepath.Join(tmpDataPath, "emojis") 734 filelock.Walk(tmpDataEmojisPath, func(path string, d fs.DirEntry, err error) error { 735 if err != nil { 736 return err 737 } 738 if d == nil { 739 return nil 740 } 741 if !util.IsValidUploadFileName(d.Name()) { 742 emojiFullName := path 743 fullPathFilteredName := filepath.Join(filepath.Dir(path), util.FilterUploadEmojiFileName(d.Name())) 744 // XSS through emoji name https://github.com/siyuan-note/siyuan/issues/15034 745 logging.LogWarnf("renaming invalid custom emoji file [%s] to [%s]", d.Name(), fullPathFilteredName) 746 if removeErr := filelock.Rename(emojiFullName, fullPathFilteredName); nil != removeErr { 747 logging.LogErrorf("renaming invalid custom emoji file to [%s] failed: %s", fullPathFilteredName, removeErr) 748 } 749 } 750 return nil 751 }) 752 if err = filelock.Copy(tmpDataPath, util.DataDir); err != nil { 753 logging.LogErrorf("copy data dir from [%s] to [%s] failed: %s", tmpDataPath, util.DataDir, err) 754 err = errors.New("copy data failed") 755 return 756 } 757 758 logging.LogInfof("import data from [%s] done", zipPath) 759 IncSync() 760 FullReindex() 761 return 762} 763 764func ImportFromLocalPath(boxID, localPath string, toPath string) (err error) { 765 util.PushEndlessProgress(Conf.Language(73)) 766 defer func() { 767 util.PushClearProgress() 768 769 if e := recover(); nil != e { 770 stack := debug.Stack() 771 msg := fmt.Sprintf("PANIC RECOVERED: %v\n\t%s\n", e, stack) 772 logging.LogErrorf("import from local path failed: %s", msg) 773 err = errors.New("import from local path failed, please check kernel log for details") 774 } 775 }() 776 777 lockSync() 778 defer unlockSync() 779 780 FlushTxQueue() 781 782 var baseHPath, baseTargetPath, boxLocalPath string 783 if "/" == toPath { 784 baseHPath = "/" 785 baseTargetPath = "/" 786 } else { 787 block := treenode.GetBlockTreeRootByPath(boxID, toPath) 788 if nil == block { 789 logging.LogErrorf("not found block by path [%s]", toPath) 790 return nil 791 } 792 baseHPath = block.HPath 793 baseTargetPath = strings.TrimSuffix(block.Path, ".sy") 794 } 795 boxLocalPath = filepath.Join(util.DataDir, boxID) 796 797 hPathsIDs := map[string]string{} 798 idPaths := map[string]string{} 799 moveIDs := map[string]string{} 800 assetsDone := map[string]string{} 801 if gulu.File.IsDir(localPath) { // 导入文件夹 802 // 收集所有资源文件 803 assets := map[string]string{} 804 filelock.Walk(localPath, func(currentPath string, d fs.DirEntry, err error) error { 805 if err != nil { 806 return err 807 } 808 if d == nil { 809 return nil 810 } 811 if localPath == currentPath { 812 return nil 813 } 814 if strings.HasPrefix(d.Name(), ".") { 815 if d.IsDir() { 816 return filepath.SkipDir 817 } 818 return nil 819 } 820 821 if !strings.HasSuffix(d.Name(), ".md") && !strings.HasSuffix(d.Name(), ".markdown") { 822 assets[currentPath] = currentPath 823 return nil 824 } 825 return nil 826 }) 827 828 targetPaths := map[string]string{} 829 count := 0 830 // md 转换 sy 831 filelock.Walk(localPath, func(currentPath string, d fs.DirEntry, err error) error { 832 if err != nil { 833 return err 834 } 835 if d == nil { 836 return nil 837 } 838 if strings.HasPrefix(d.Name(), ".") { 839 if d.IsDir() { 840 return filepath.SkipDir 841 } 842 return nil 843 } 844 845 var tree *parse.Tree 846 var ext string 847 title := d.Name() 848 if !d.IsDir() { 849 ext = util.Ext(d.Name()) 850 title = strings.TrimSuffix(d.Name(), ext) 851 } 852 id := ast.NewNodeID() 853 854 curRelPath := filepath.ToSlash(strings.TrimPrefix(currentPath, localPath)) 855 targetPath := path.Join(baseTargetPath, id) 856 hPath := path.Join(baseHPath, filepath.Base(localPath), filepath.ToSlash(strings.TrimPrefix(currentPath, localPath))) 857 hPath = strings.TrimSuffix(hPath, ext) 858 if "" == curRelPath { 859 curRelPath = "/" 860 hPath = "/" + title 861 } else { 862 dirPath := targetPaths[path.Dir(curRelPath)] 863 targetPath = path.Join(dirPath, id) 864 } 865 866 targetPath = strings.ReplaceAll(targetPath, ".sy/", "/") 867 targetPath += ".sy" 868 if _, ok := targetPaths[curRelPath]; !ok { 869 targetPaths[curRelPath] = targetPath 870 } else { 871 targetPath = targetPaths[curRelPath] 872 id = util.GetTreeID(targetPath) 873 } 874 875 if d.IsDir() { 876 if subMdFiles := util.GetFilePathsByExts(currentPath, []string{".md", ".markdown"}); 1 > len(subMdFiles) { 877 // 如果该文件夹中不包含 Markdown 文件则不处理 https://github.com/siyuan-note/siyuan/issues/11567 878 return nil 879 } 880 881 // 如果当前文件夹路径下包含同名的 Markdown 文件,则不创建空文档 https://github.com/siyuan-note/siyuan/issues/13149 882 if gulu.File.IsExist(currentPath+".md") || gulu.File.IsExist(currentPath+".markdown") { 883 targetPaths[curRelPath+".md"] = targetPath 884 return nil 885 } 886 887 tree = treenode.NewTree(boxID, targetPath, hPath, title) 888 importTrees = append(importTrees, tree) 889 return nil 890 } 891 892 if !strings.HasSuffix(d.Name(), ".md") && !strings.HasSuffix(d.Name(), ".markdown") { 893 return nil 894 } 895 896 data, readErr := os.ReadFile(currentPath) 897 if nil != readErr { 898 err = readErr 899 return io.EOF 900 } 901 902 tree, yfmRootID, yfmTitle, yfmUpdated := parseStdMd(data) 903 if nil == tree { 904 logging.LogErrorf("parse tree [%s] failed", currentPath) 905 return nil 906 } 907 908 if "" != yfmRootID { 909 moveIDs[id] = yfmRootID 910 id = yfmRootID 911 } 912 if "" != yfmTitle { 913 title = yfmTitle 914 } 915 unescapedTitle, unescapeErr := url.PathUnescape(title) 916 if nil == unescapeErr { 917 title = unescapedTitle 918 } 919 hPath = path.Join(path.Dir(hPath), title) 920 updated := yfmUpdated 921 fname := path.Base(targetPath) 922 targetPath = strings.ReplaceAll(targetPath, fname, id+".sy") 923 targetPaths[curRelPath] = targetPath 924 925 tree.ID = id 926 tree.Root.ID = id 927 tree.Root.SetIALAttr("id", tree.Root.ID) 928 tree.Root.SetIALAttr("title", title) 929 tree.Box = boxID 930 targetPath = path.Join(path.Dir(targetPath), tree.Root.ID+".sy") 931 tree.Path = targetPath 932 targetPaths[curRelPath] = targetPath 933 tree.HPath = hPath 934 tree.Root.Spec = "1" 935 936 docDirLocalPath := filepath.Dir(filepath.Join(boxLocalPath, targetPath)) 937 assetDirPath := getAssetsDir(boxLocalPath, docDirLocalPath) 938 currentDir := filepath.Dir(currentPath) 939 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { 940 if !entering || (ast.NodeLinkDest != n.Type && !n.IsTextMarkType("a")) { 941 return ast.WalkContinue 942 } 943 944 var dest string 945 if ast.NodeLinkDest == n.Type { 946 dest = n.TokensStr() 947 } else { 948 dest = n.TextMarkAHref 949 } 950 951 if strings.HasPrefix(dest, "data:image") && strings.Contains(dest, ";base64,") { 952 processBase64Img(n, dest, assetDirPath) 953 return ast.WalkContinue 954 } 955 956 decodedDest := string(html.DecodeDestination([]byte(dest))) 957 if decodedDest != dest { 958 dest = decodedDest 959 } 960 absolutePath := filepath.Join(currentDir, dest) 961 962 if ast.NodeLinkDest == n.Type { 963 n.Tokens = []byte(dest) 964 } else { 965 n.TextMarkAHref = dest 966 } 967 if !util.IsRelativePath(dest) { 968 return ast.WalkContinue 969 } 970 dest = filepath.ToSlash(dest) 971 if "" == dest { 972 return ast.WalkContinue 973 } 974 975 if !gulu.File.IsExist(absolutePath) { 976 return ast.WalkContinue 977 } 978 979 existName := assetsDone[absolutePath] 980 var name string 981 if "" == existName { 982 name = filepath.Base(absolutePath) 983 name = util.FilterUploadFileName(name) 984 name = util.AssetName(name, ast.NewNodeID()) 985 assetTargetPath := filepath.Join(assetDirPath, name) 986 if err = filelock.Copy(absolutePath, assetTargetPath); err != nil { 987 logging.LogErrorf("copy asset from [%s] to [%s] failed: %s", absolutePath, assetTargetPath, err) 988 return ast.WalkContinue 989 } 990 assetsDone[absolutePath] = name 991 } else { 992 name = existName 993 } 994 if ast.NodeLinkDest == n.Type { 995 n.Tokens = []byte("assets/" + name) 996 } else { 997 n.TextMarkAHref = "assets/" + name 998 } 999 return ast.WalkContinue 1000 }) 1001 1002 reassignIDUpdated(tree, id, updated) 1003 importTrees = append(importTrees, tree) 1004 1005 hPathsIDs[tree.HPath] = tree.ID 1006 idPaths[tree.ID] = tree.Path 1007 1008 count++ 1009 if 0 == count%4 { 1010 util.PushEndlessProgress(fmt.Sprintf(Conf.language(70), fmt.Sprintf("%s", tree.HPath))) 1011 } 1012 return nil 1013 }) 1014 } else { // 导入单个文件 1015 fileName := filepath.Base(localPath) 1016 if !strings.HasSuffix(fileName, ".md") && !strings.HasSuffix(fileName, ".markdown") { 1017 return errors.New(Conf.Language(79)) 1018 } 1019 1020 title := strings.TrimSuffix(fileName, ".markdown") 1021 title = strings.TrimSuffix(title, ".md") 1022 targetPath := strings.TrimSuffix(toPath, ".sy") 1023 id := ast.NewNodeID() 1024 targetPath = path.Join(targetPath, id+".sy") 1025 var data []byte 1026 data, err = os.ReadFile(localPath) 1027 if err != nil { 1028 return err 1029 } 1030 tree, yfmRootID, yfmTitle, yfmUpdated := parseStdMd(data) 1031 if nil == tree { 1032 msg := fmt.Sprintf("parse tree [%s] failed", localPath) 1033 logging.LogErrorf(msg) 1034 return errors.New(msg) 1035 } 1036 1037 if "" != yfmRootID { 1038 id = yfmRootID 1039 } 1040 if "" != yfmTitle { 1041 title = yfmTitle 1042 } 1043 unescapedTitle, unescapeErr := url.PathUnescape(title) 1044 if nil == unescapeErr { 1045 title = unescapedTitle 1046 } 1047 updated := yfmUpdated 1048 fname := path.Base(targetPath) 1049 targetPath = strings.ReplaceAll(targetPath, fname, id+".sy") 1050 1051 tree.ID = id 1052 tree.Root.ID = id 1053 tree.Root.SetIALAttr("id", tree.Root.ID) 1054 tree.Root.SetIALAttr("title", title) 1055 tree.Box = boxID 1056 tree.Path = targetPath 1057 tree.HPath = path.Join(baseHPath, title) 1058 tree.Root.Spec = "1" 1059 1060 docDirLocalPath := filepath.Dir(filepath.Join(boxLocalPath, targetPath)) 1061 assetDirPath := getAssetsDir(boxLocalPath, docDirLocalPath) 1062 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { 1063 if !entering || (ast.NodeLinkDest != n.Type && !n.IsTextMarkType("a")) { 1064 return ast.WalkContinue 1065 } 1066 1067 var dest string 1068 if ast.NodeLinkDest == n.Type { 1069 dest = n.TokensStr() 1070 } else { 1071 dest = n.TextMarkAHref 1072 } 1073 1074 if strings.HasPrefix(dest, "data:image") && strings.Contains(dest, ";base64,") { 1075 processBase64Img(n, dest, assetDirPath) 1076 return ast.WalkContinue 1077 } 1078 1079 decodedDest := string(html.DecodeDestination([]byte(dest))) 1080 if decodedDest != dest { 1081 dest = decodedDest 1082 } 1083 absolutePath := filepath.Join(filepath.Dir(localPath), dest) 1084 1085 if ast.NodeLinkDest == n.Type { 1086 n.Tokens = []byte(dest) 1087 } else { 1088 n.TextMarkAHref = dest 1089 } 1090 if !util.IsRelativePath(dest) { 1091 return ast.WalkContinue 1092 } 1093 dest = filepath.ToSlash(dest) 1094 if "" == dest { 1095 return ast.WalkContinue 1096 } 1097 1098 if !gulu.File.IsExist(absolutePath) { 1099 return ast.WalkContinue 1100 } 1101 1102 existName := assetsDone[absolutePath] 1103 var name string 1104 if "" == existName { 1105 name = filepath.Base(absolutePath) 1106 name = util.FilterUploadFileName(name) 1107 name = util.AssetName(name, ast.NewNodeID()) 1108 assetTargetPath := filepath.Join(assetDirPath, name) 1109 if err = filelock.Copy(absolutePath, assetTargetPath); err != nil { 1110 logging.LogErrorf("copy asset from [%s] to [%s] failed: %s", absolutePath, assetTargetPath, err) 1111 return ast.WalkContinue 1112 } 1113 assetsDone[absolutePath] = name 1114 } else { 1115 name = existName 1116 } 1117 if ast.NodeLinkDest == n.Type { 1118 n.Tokens = []byte("assets/" + name) 1119 } else { 1120 n.TextMarkAHref = "assets/" + name 1121 } 1122 return ast.WalkContinue 1123 }) 1124 1125 reassignIDUpdated(tree, id, updated) 1126 importTrees = append(importTrees, tree) 1127 } 1128 1129 if 0 < len(importTrees) { 1130 for id, newID := range moveIDs { 1131 for _, importTree := range importTrees { 1132 importTree.ID = strings.ReplaceAll(importTree.ID, id, newID) 1133 importTree.Path = strings.ReplaceAll(importTree.Path, id, newID) 1134 } 1135 } 1136 1137 initSearchLinks() 1138 convertWikiLinksAndTags() 1139 buildBlockRefInText() 1140 1141 box := Conf.Box(boxID) 1142 for i, tree := range importTrees { 1143 indexWriteTreeIndexQueue(tree) 1144 if 0 == i%4 { 1145 util.PushEndlessProgress(fmt.Sprintf(Conf.Language(66), fmt.Sprintf("%d/%d ", i, len(importTrees))+tree.HPath)) 1146 } 1147 } 1148 util.PushClearProgress() 1149 1150 importTrees = []*parse.Tree{} 1151 searchLinks = map[string]string{} 1152 1153 // 按照路径排序 Improve sort when importing markdown files https://github.com/siyuan-note/siyuan/issues/11390 1154 var hPaths []string 1155 for hPath := range hPathsIDs { 1156 hPaths = append(hPaths, hPath) 1157 } 1158 sort.Strings(hPaths) 1159 paths := map[string][]string{} 1160 for _, hPath := range hPaths { 1161 p := idPaths[hPathsIDs[hPath]] 1162 parent := path.Dir(p) 1163 for { 1164 if baseTargetPath == parent { 1165 break 1166 } 1167 1168 if ps, ok := paths[parent]; !ok { 1169 paths[parent] = []string{p} 1170 } else { 1171 ps = append(ps, p) 1172 ps = gulu.Str.RemoveDuplicatedElem(ps) 1173 paths[parent] = ps 1174 } 1175 p = parent 1176 parent = path.Dir(parent) 1177 } 1178 } 1179 1180 sortIDVals := map[string]int{} 1181 for _, ps := range paths { 1182 sortVal := 0 1183 for _, p := range ps { 1184 sortIDVals[util.GetTreeID(p)] = sortVal 1185 sortVal++ 1186 } 1187 } 1188 box.setSort(sortIDVals) 1189 } 1190 1191 IncSync() 1192 debug.FreeOSMemory() 1193 return 1194} 1195 1196func parseStdMd(markdown []byte) (ret *parse.Tree, yfmRootID, yfmTitle, yfmUpdated string) { 1197 luteEngine := util.NewStdLute() 1198 luteEngine.SetYamlFrontMatter(true) // 解析 YAML Front Matter https://github.com/siyuan-note/siyuan/issues/10878 1199 ret = parse.Parse("", markdown, luteEngine.ParseOptions) 1200 if nil == ret { 1201 return 1202 } 1203 yfmRootID, yfmTitle, yfmUpdated = normalizeTree(ret) 1204 htmlBlock2Inline(ret) 1205 parse.TextMarks2Inlines(ret) // 先将 TextMark 转换为 Inlines https://github.com/siyuan-note/siyuan/issues/13056 1206 parse.NestedInlines2FlattedSpansHybrid(ret, false) 1207 return 1208} 1209 1210func processBase64Img(n *ast.Node, dest string, assetDirPath string) { 1211 base64TmpDir := filepath.Join(util.TempDir, "base64") 1212 os.MkdirAll(base64TmpDir, 0755) 1213 1214 sep := strings.Index(dest, ";base64,") 1215 str := strings.TrimSpace(dest[sep+8:]) 1216 re := regexp.MustCompile(`(?i)%0A`) 1217 str = re.ReplaceAllString(str, "\n") 1218 var decodeErr error 1219 unbased, decodeErr := base64.StdEncoding.DecodeString(str) 1220 if nil != decodeErr { 1221 logging.LogErrorf("decode base64 image failed: %s", decodeErr) 1222 return 1223 } 1224 dataReader := bytes.NewReader(unbased) 1225 var img image.Image 1226 var ext string 1227 typ := dest[5:sep] 1228 switch typ { 1229 case "image/png": 1230 img, decodeErr = png.Decode(dataReader) 1231 ext = ".png" 1232 case "image/jpeg": 1233 img, decodeErr = jpeg.Decode(dataReader) 1234 ext = ".jpg" 1235 case "image/svg+xml": 1236 ext = ".svg" 1237 default: 1238 logging.LogWarnf("unsupported base64 image type [%s]", typ) 1239 return 1240 } 1241 if nil != decodeErr { 1242 logging.LogErrorf("decode base64 image failed: %s", decodeErr) 1243 return 1244 } 1245 1246 name := "image" + ext 1247 alt := n.Parent.ChildByType(ast.NodeLinkText) 1248 if nil != alt { 1249 name = alt.TokensStr() + ext 1250 } 1251 name = util.FilterUploadFileName(name) 1252 name = util.AssetName(name, ast.NewNodeID()) 1253 1254 tmp := filepath.Join(base64TmpDir, name) 1255 tmpFile, openErr := os.OpenFile(tmp, os.O_RDWR|os.O_CREATE, 0644) 1256 if nil != openErr { 1257 logging.LogErrorf("open temp file [%s] failed: %s", tmp, openErr) 1258 return 1259 } 1260 1261 var encodeErr error 1262 switch typ { 1263 case "image/png": 1264 encodeErr = png.Encode(tmpFile, img) 1265 case "image/jpeg": 1266 encodeErr = jpeg.Encode(tmpFile, img, &jpeg.Options{Quality: 100}) 1267 case "image/svg+xml": 1268 _, encodeErr = tmpFile.Write(unbased) 1269 } 1270 if nil != encodeErr { 1271 logging.LogErrorf("encode base64 image failed: %s", encodeErr) 1272 tmpFile.Close() 1273 return 1274 } 1275 tmpFile.Close() 1276 1277 assetTargetPath := filepath.Join(assetDirPath, name) 1278 if err := filelock.Copy(tmp, assetTargetPath); err != nil { 1279 logging.LogErrorf("copy asset from [%s] to [%s] failed: %s", tmp, assetTargetPath, err) 1280 return 1281 } 1282 n.Tokens = []byte("assets/" + name) 1283} 1284 1285func htmlBlock2Inline(tree *parse.Tree) { 1286 imgHtmlBlocks := map[*ast.Node]*html.Node{} 1287 aHtmlBlocks := map[*ast.Node]*html.Node{} 1288 var unlinks []*ast.Node 1289 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { 1290 if !entering { 1291 return ast.WalkContinue 1292 } 1293 1294 if ast.NodeHTMLBlock == n.Type || (ast.NodeText == n.Type && bytes.HasPrefix(bytes.ToLower(n.Tokens), []byte("<img "))) { 1295 tokens := bytes.TrimSpace(n.Tokens) 1296 if bytes.HasPrefix(tokens, []byte("<div>")) { 1297 tokens = bytes.TrimPrefix(tokens, []byte("<div>")) 1298 } 1299 if bytes.HasSuffix(tokens, []byte("</div>")) { 1300 tokens = bytes.TrimSuffix(tokens, []byte("</div>")) 1301 } 1302 tokens = bytes.TrimSpace(tokens) 1303 1304 htmlNodes, pErr := html.ParseFragment(bytes.NewReader(tokens), &html.Node{Type: html.ElementNode}) 1305 if nil != pErr { 1306 logging.LogErrorf("parse html block [%s] failed: %s", n.Tokens, pErr) 1307 return ast.WalkContinue 1308 } 1309 if 1 > len(htmlNodes) { 1310 return ast.WalkContinue 1311 } 1312 1313 for _, htmlNode := range htmlNodes { 1314 if atom.Img == htmlNode.DataAtom { 1315 imgHtmlBlocks[n] = htmlNode 1316 break 1317 } 1318 } 1319 } 1320 if ast.NodeHTMLBlock == n.Type || (ast.NodeText == n.Type && bytes.HasPrefix(bytes.ToLower(n.Tokens), []byte("<a "))) { 1321 tokens := bytes.TrimSpace(n.Tokens) 1322 if bytes.HasPrefix(tokens, []byte("<div>")) { 1323 tokens = bytes.TrimPrefix(tokens, []byte("<div>")) 1324 } 1325 if bytes.HasSuffix(tokens, []byte("</div>")) { 1326 tokens = bytes.TrimSuffix(tokens, []byte("</div>")) 1327 } 1328 tokens = bytes.TrimSpace(tokens) 1329 1330 if ast.NodeHTMLBlock != n.Type && nil != n.Next && nil != n.Next.Next { 1331 if ast.NodeText == n.Next.Next.Type && bytes.Equal(n.Next.Next.Tokens, []byte("</a>")) { 1332 tokens = append(tokens, n.Next.Tokens...) 1333 tokens = append(tokens, []byte("</a>")...) 1334 unlinks = append(unlinks, n.Next) 1335 unlinks = append(unlinks, n.Next.Next) 1336 } 1337 } 1338 1339 htmlNodes, pErr := html.ParseFragment(bytes.NewReader(tokens), &html.Node{Type: html.ElementNode}) 1340 if nil != pErr { 1341 logging.LogErrorf("parse html block [%s] failed: %s", n.Tokens, pErr) 1342 return ast.WalkContinue 1343 } 1344 if 1 > len(htmlNodes) { 1345 return ast.WalkContinue 1346 } 1347 1348 for _, htmlNode := range htmlNodes { 1349 if atom.A == htmlNode.DataAtom { 1350 aHtmlBlocks[n] = htmlNode 1351 break 1352 } 1353 } 1354 } 1355 return ast.WalkContinue 1356 }) 1357 1358 for n, htmlImg := range imgHtmlBlocks { 1359 src := domAttrValue(htmlImg, "src") 1360 alt := domAttrValue(htmlImg, "alt") 1361 title := domAttrValue(htmlImg, "title") 1362 1363 p := treenode.NewParagraph(n.ID) 1364 img := &ast.Node{Type: ast.NodeImage} 1365 p.AppendChild(img) 1366 img.AppendChild(&ast.Node{Type: ast.NodeBang}) 1367 img.AppendChild(&ast.Node{Type: ast.NodeOpenBracket}) 1368 img.AppendChild(&ast.Node{Type: ast.NodeLinkText, Tokens: []byte(alt)}) 1369 img.AppendChild(&ast.Node{Type: ast.NodeCloseBracket}) 1370 img.AppendChild(&ast.Node{Type: ast.NodeOpenParen}) 1371 img.AppendChild(&ast.Node{Type: ast.NodeLinkDest, Tokens: []byte(src)}) 1372 if "" != title { 1373 img.AppendChild(&ast.Node{Type: ast.NodeLinkSpace}) 1374 img.AppendChild(&ast.Node{Type: ast.NodeLinkTitle}) 1375 } 1376 img.AppendChild(&ast.Node{Type: ast.NodeCloseParen}) 1377 if width := domAttrValue(htmlImg, "width"); "" != width { 1378 if util2.IsDigit(width) { 1379 width += "px" 1380 } 1381 style := "width: " + width + ";" 1382 ial := &ast.Node{Type: ast.NodeKramdownSpanIAL, Tokens: parse.IAL2Tokens([][]string{{"style", style}})} 1383 img.SetIALAttr("style", style) 1384 img.InsertAfter(ial) 1385 } else if height := domAttrValue(htmlImg, "height"); "" != height { 1386 if util2.IsDigit(height) { 1387 height += "px" 1388 } 1389 style := "height: " + height + ";" 1390 ial := &ast.Node{Type: ast.NodeKramdownSpanIAL, Tokens: parse.IAL2Tokens([][]string{{"style", style}})} 1391 img.SetIALAttr("style", style) 1392 img.InsertAfter(ial) 1393 } 1394 1395 if ast.NodeHTMLBlock == n.Type { 1396 n.InsertBefore(p) 1397 } else if ast.NodeText == n.Type { 1398 if nil != n.Parent { 1399 if n.Parent.IsContainerBlock() { 1400 n.InsertBefore(p) 1401 } else { 1402 n.InsertBefore(img) 1403 } 1404 } else { 1405 n.InsertBefore(p) 1406 } 1407 } 1408 unlinks = append(unlinks, n) 1409 } 1410 1411 for n, htmlA := range aHtmlBlocks { 1412 href := domAttrValue(htmlA, "href") 1413 title := domAttrValue(htmlA, "title") 1414 anchor := util2.DomText(htmlA) 1415 1416 p := treenode.NewParagraph(n.ID) 1417 a := &ast.Node{Type: ast.NodeLink} 1418 p.AppendChild(a) 1419 a.AppendChild(&ast.Node{Type: ast.NodeOpenBracket}) 1420 a.AppendChild(&ast.Node{Type: ast.NodeLinkText, Tokens: []byte(anchor)}) 1421 a.AppendChild(&ast.Node{Type: ast.NodeCloseBracket}) 1422 a.AppendChild(&ast.Node{Type: ast.NodeOpenParen}) 1423 a.AppendChild(&ast.Node{Type: ast.NodeLinkDest, Tokens: []byte(href)}) 1424 if "" != title { 1425 a.AppendChild(&ast.Node{Type: ast.NodeLinkSpace}) 1426 a.AppendChild(&ast.Node{Type: ast.NodeLinkTitle, Tokens: []byte(title)}) 1427 } 1428 a.AppendChild(&ast.Node{Type: ast.NodeCloseParen}) 1429 1430 if ast.NodeHTMLBlock == n.Type || (nil == n.Previous && (nil != n.Next && nil != n.Next.Next && nil == n.Next.Next.Next)) { 1431 n.InsertBefore(p) 1432 } else { 1433 n.InsertBefore(a) 1434 } 1435 unlinks = append(unlinks, n) 1436 } 1437 1438 for _, n := range unlinks { 1439 n.Unlink() 1440 } 1441 return 1442} 1443 1444func reassignIDUpdated(tree *parse.Tree, rootID, updated string) { 1445 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { 1446 if !entering || "" == n.ID { 1447 return ast.WalkContinue 1448 } 1449 1450 n.ID = ast.NewNodeID() 1451 if ast.NodeDocument == n.Type && "" != rootID { 1452 n.ID = rootID 1453 } 1454 1455 n.SetIALAttr("id", n.ID) 1456 if "" != updated { 1457 n.SetIALAttr("updated", updated) 1458 if "" == rootID { 1459 n.ID = updated + "-" + gulu.Rand.String(7) 1460 n.SetIALAttr("id", n.ID) 1461 } 1462 } else { 1463 n.SetIALAttr("updated", util.TimeFromID(n.ID)) 1464 } 1465 return ast.WalkContinue 1466 }) 1467 tree.ID = tree.Root.ID 1468 tree.Path = path.Join(path.Dir(tree.Path), tree.ID+".sy") 1469 tree.Root.SetIALAttr("id", tree.Root.ID) 1470} 1471 1472func domAttrValue(n *html.Node, attrName string) string { 1473 if nil == n { 1474 return "" 1475 } 1476 1477 for _, attr := range n.Attr { 1478 if attr.Key == attrName { 1479 return attr.Val 1480 } 1481 } 1482 return "" 1483} 1484 1485var importTrees []*parse.Tree 1486var searchLinks = map[string]string{} 1487 1488func initSearchLinks() { 1489 for _, tree := range importTrees { 1490 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { 1491 if !entering || (ast.NodeDocument != n.Type && ast.NodeHeading != n.Type) { 1492 return ast.WalkContinue 1493 } 1494 1495 nodePath := tree.HPath + "#" 1496 if ast.NodeHeading == n.Type { 1497 nodePath += n.Text() 1498 } 1499 1500 searchLinks[nodePath] = n.ID 1501 return ast.WalkContinue 1502 }) 1503 } 1504} 1505 1506func convertWikiLinksAndTags() { 1507 for _, tree := range importTrees { 1508 convertWikiLinksAndTags0(tree) 1509 } 1510} 1511 1512func convertWikiLinksAndTags0(tree *parse.Tree) { 1513 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { 1514 if !entering || ast.NodeText != n.Type { 1515 return ast.WalkContinue 1516 } 1517 1518 text := n.TokensStr() 1519 length := len(text) 1520 start, end := 0, length 1521 for { 1522 part := text[start:end] 1523 if idx := strings.Index(part, "]]"); 0 > idx { 1524 break 1525 } else { 1526 end = start + idx 1527 } 1528 if idx := strings.Index(part, "[["); 0 > idx { 1529 break 1530 } else { 1531 start += idx 1532 } 1533 if end <= start { 1534 break 1535 } 1536 1537 link := path.Join(path.Dir(tree.HPath), text[start+2:end]) // 统一转为绝对路径方便后续查找 1538 linkText := path.Base(link) 1539 dynamicAnchorText := true 1540 if linkParts := strings.Split(link, "|"); 1 < len(linkParts) { 1541 link = linkParts[0] 1542 linkText = linkParts[1] 1543 dynamicAnchorText = false 1544 } 1545 link, linkText = strings.TrimSpace(link), strings.TrimSpace(linkText) 1546 if !strings.Contains(link, "#") { 1547 link += "#" // 在结尾统一带上锚点方便后续查找 1548 } 1549 1550 id := searchLinkID(link) 1551 if "" == id { 1552 start, end = end, length 1553 continue 1554 } 1555 1556 linkText = strings.TrimPrefix(linkText, "/") 1557 repl := "((" + id + " '" + linkText + "'))" 1558 if !dynamicAnchorText { 1559 repl = "((" + id + " \"" + linkText + "\"))" 1560 } 1561 end += 2 1562 text = text[:start] + repl + text[end:] 1563 start, end = start+len(repl), len(text) 1564 length = end 1565 } 1566 1567 text = convertTags(text) // 导入标签语法 1568 n.Tokens = gulu.Str.ToBytes(text) 1569 return ast.WalkContinue 1570 }) 1571} 1572 1573func convertTags(text string) (ret string) { 1574 if !util.MarkdownSettings.InlineTag { 1575 return text 1576 } 1577 1578 pos, i := -1, 0 1579 tokens := []byte(text) 1580 for ; i < len(tokens); i++ { 1581 if '#' == tokens[i] && (0 == i || ' ' == tokens[i-1] || (-1 < pos && '#' == tokens[pos])) { 1582 if i < len(tokens)-1 && '#' == tokens[i+1] { 1583 pos = -1 1584 continue 1585 } 1586 pos = i 1587 continue 1588 } 1589 1590 if -1 < pos && ' ' == tokens[i] { 1591 tokens = append(tokens, 0) 1592 copy(tokens[i+1:], tokens[i:]) 1593 tokens[i] = '#' 1594 pos = -1 1595 i++ 1596 } 1597 } 1598 if -1 < pos && pos < i { 1599 tokens = append(tokens, '#') 1600 } 1601 return string(tokens) 1602} 1603 1604// buildBlockRefInText 将文本节点进行结构化处理。 1605func buildBlockRefInText() { 1606 luteEngine := NewLute() 1607 luteEngine.SetHTMLTag2TextMark(true) 1608 for _, tree := range importTrees { 1609 tree.MergeText() 1610 1611 var unlinkTextNodes []*ast.Node 1612 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { 1613 if !entering || ast.NodeText != n.Type { 1614 return ast.WalkContinue 1615 } 1616 1617 if nil == n.Tokens { 1618 return ast.WalkContinue 1619 } 1620 1621 t := parse.Inline("", n.Tokens, luteEngine.ParseOptions) // 使用行级解析 1622 parse.NestedInlines2FlattedSpans(t, false) 1623 var children []*ast.Node 1624 for c := t.Root.FirstChild.FirstChild; nil != c; c = c.Next { 1625 children = append(children, c) 1626 } 1627 for _, c := range children { 1628 n.InsertBefore(c) 1629 } 1630 unlinkTextNodes = append(unlinkTextNodes, n) 1631 return ast.WalkContinue 1632 }) 1633 1634 for _, node := range unlinkTextNodes { 1635 node.Unlink() 1636 } 1637 } 1638} 1639 1640func searchLinkID(link string) (id string) { 1641 id = searchLinks[link] 1642 if "" != id { 1643 return 1644 } 1645 1646 baseName := path.Base(link) 1647 for searchLink, searchID := range searchLinks { 1648 if path.Base(searchLink) == baseName { 1649 return searchID 1650 } 1651 } 1652 return 1653}