A privacy-first, self-hosted, fully open source personal knowledge management software, written in typescript and golang. (PERSONAL FORK)
at lambda-fork/main 80 lines 1.8 kB view raw
1package model 2 3import ( 4 "path/filepath" 5 "strings" 6 "time" 7 8 "github.com/siyuan-note/logging" 9 "github.com/siyuan-note/siyuan/kernel/cache" 10 "github.com/siyuan-note/siyuan/kernel/sql" 11 "github.com/siyuan-note/siyuan/kernel/task" 12 "github.com/siyuan-note/siyuan/kernel/util" 13) 14 15func OCRAssetsJob() { 16 util.WaitForTesseractInit() 17 18 if !util.TesseractEnabled { 19 return 20 } 21 22 task.AppendTaskWithTimeout(task.OCRImage, 30*time.Second, autoOCRAssets) 23} 24 25func autoOCRAssets() { 26 if !util.TesseractEnabled { 27 return 28 } 29 30 defer logging.Recover() 31 32 assetsPath := util.GetDataAssetsAbsPath() 33 assets := getUnOCRAssetsAbsPaths() 34 if 0 < len(assets) { 35 for i, assetAbsPath := range assets { 36 text := util.GetOcrJsonText(util.Tesseract(assetAbsPath)) 37 p := strings.TrimPrefix(assetAbsPath, assetsPath) 38 p = "assets" + filepath.ToSlash(p) 39 util.SetAssetText(p, text) 40 if 7 <= i { // 一次任务中最多处理 7 张图片,防止长时间占用系统资源 41 break 42 } 43 } 44 } 45 46 util.CleanNotExistAssetsTexts() 47 48 // 刷新 OCR 结果到数据库 49 util.NodeOCRQueueLock.Lock() 50 defer util.NodeOCRQueueLock.Unlock() 51 for _, id := range util.NodeOCRQueue { 52 sql.IndexNodeQueue(id) 53 } 54 util.NodeOCRQueue = nil 55} 56 57func getUnOCRAssetsAbsPaths() (ret []string) { 58 var assetsPaths []string 59 assets := cache.GetAssets() 60 for _, asset := range assets { 61 if !util.IsTesseractExtractable(asset.Path) { 62 continue 63 } 64 assetsPaths = append(assetsPaths, asset.Path) 65 } 66 67 assetsPath := util.GetDataAssetsAbsPath() 68 for _, assetPath := range assetsPaths { 69 if util.ExistsAssetText(assetPath) { 70 continue 71 } 72 absPath := filepath.Join(assetsPath, strings.TrimPrefix(assetPath, "assets")) 73 ret = append(ret, absPath) 74 } 75 return 76} 77 78func FlushAssetsTextsJob() { 79 util.SaveAssetsTexts() 80}