forked from tangled.org/core
Monorepo for Tangled

knotserver: simplify language calculation logic

reference forgejo's language analysis a bit more, and calculate ratios
based on number of bytes

Signed-off-by: oppiliappan <me@oppi.li>

authored by oppi.li and committed by Tangled 6298f2f7 65ad4406

Changed files
+59 -53
knotserver
types
+14 -15
knotserver/git/git.go
··· 2 2 3 3 import ( 4 4 "archive/tar" 5 + "bytes" 5 6 "fmt" 6 7 "io" 7 8 "io/fs" ··· 201 202 } 202 203 203 204 func (g *GitRepo) FileContentN(path string, cap int64) ([]byte, error) { 204 - buf := []byte{} 205 - 206 205 c, err := g.r.CommitObject(g.h) 207 206 if err != nil { 208 207 return nil, fmt.Errorf("commit object: %w", err) ··· 219 218 } 220 219 221 220 isbin, _ := file.IsBinary() 222 - 223 - if !isbin { 224 - reader, err := file.Reader() 225 - if err != nil { 226 - return nil, err 227 - } 228 - bufReader := io.LimitReader(reader, cap) 229 - _, err = bufReader.Read(buf) 230 - if err != nil { 231 - return nil, err 232 - } 233 - return buf, nil 234 - } else { 221 + if isbin { 235 222 return nil, ErrBinaryFile 236 223 } 224 + 225 + reader, err := file.Reader() 226 + if err != nil { 227 + return nil, err 228 + } 229 + 230 + buf := new(bytes.Buffer) 231 + if _, err = buf.ReadFrom(io.LimitReader(reader, cap)); err != nil { 232 + return nil, err 233 + } 234 + 235 + return buf.Bytes(), nil 237 236 } 238 237 239 238 func (g *GitRepo) FileContent(path string) (string, error) {
+44 -37
knotserver/routes.go
··· 18 18 "strconv" 19 19 "strings" 20 20 "sync" 21 + "time" 21 22 22 23 securejoin "github.com/cyphar/filepath-securejoin" 23 24 "github.com/gliderlabs/ssh" ··· 763 764 } 764 765 765 766 func (h *Handle) RepoLanguages(w http.ResponseWriter, r *http.Request) { 766 - path, _ := securejoin.SecureJoin(h.c.Repo.ScanPath, didPath(r)) 767 + repoPath, _ := securejoin.SecureJoin(h.c.Repo.ScanPath, didPath(r)) 767 768 ref := chi.URLParam(r, "ref") 768 769 ref, _ = url.PathUnescape(ref) 769 770 770 771 l := h.l.With("handler", "RepoLanguages") 771 772 772 - gr, err := git.Open(path, ref) 773 + gr, err := git.Open(repoPath, ref) 773 774 if err != nil { 774 775 l.Error("opening repo", "error", err.Error()) 775 776 notFound(w) 776 777 return 777 778 } 778 779 779 - languageFileCount := make(map[string]int) 780 + sizes := make(map[string]int64) 780 781 781 - err = recurseEntireTree(r.Context(), gr, func(absPath string) { 782 - lang, safe := enry.GetLanguageByExtension(absPath) 783 - if len(lang) == 0 || !safe { 784 - content, _ := gr.FileContentN(absPath, 1024) 785 - if !safe { 786 - lang = enry.GetLanguage(absPath, content) 787 - if len(lang) == 0 { 788 - lang = "Other" 789 - } 790 - } else { 791 - lang, _ = enry.GetLanguageByContent(absPath, content) 792 - if len(lang) == 0 { 793 - lang = "Other" 794 - } 795 - } 782 + ctx, cancel := context.WithTimeout(r.Context(), 1*time.Second) 783 + defer cancel() 784 + 785 + err = gr.Walk(ctx, "", func(node object.TreeEntry, parent *object.Tree, root string) error { 786 + filepath := path.Join(root, node.Name) 787 + 788 + content, err := gr.FileContentN(filepath, 16*1024) // 16KB 789 + if err != nil { 790 + return nil 796 791 } 797 792 798 - v, ok := languageFileCount[lang] 799 - if ok { 800 - languageFileCount[lang] = v + 1 801 - } else { 802 - languageFileCount[lang] = 1 793 + if enry.IsGenerated(filepath, content) { 794 + return nil 803 795 } 804 - }, "") 796 + 797 + language := analyzeLanguage(node, content) 798 + if group := enry.GetLanguageGroup(language); group != "" { 799 + language = group 800 + } 801 + 802 + langType := enry.GetLanguageType(language) 803 + if langType != enry.Programming && langType != enry.Markup && langType != enry.Unknown { 804 + return nil 805 + } 806 + 807 + sz, _ := parent.Size(node.Name) 808 + sizes[language] += sz 809 + 810 + return nil 811 + }) 805 812 if err != nil { 806 813 l.Error("failed to recurse file tree", "error", err.Error()) 807 814 writeError(w, err.Error(), http.StatusNoContent) 808 815 return 809 816 } 810 817 811 - resp := types.RepoLanguageResponse{Languages: languageFileCount} 818 + resp := types.RepoLanguageResponse{Languages: sizes} 812 819 813 820 writeJSON(w, resp) 814 821 return 815 822 } 816 823 817 - func recurseEntireTree(ctx context.Context, git *git.GitRepo, callback func(absPath string), filePath string) error { 818 - files, err := git.FileTree(ctx, filePath) 819 - if err != nil { 820 - log.Println(err) 821 - return err 824 + func analyzeLanguage(node object.TreeEntry, content []byte) string { 825 + language, ok := enry.GetLanguageByExtension(node.Name) 826 + if ok { 827 + return language 822 828 } 823 829 824 - for _, file := range files { 825 - absPath := path.Join(filePath, file.Name) 826 - if !file.IsFile { 827 - return recurseEntireTree(ctx, git, callback, absPath) 828 - } 829 - callback(absPath) 830 + language, ok = enry.GetLanguageByFilename(node.Name) 831 + if ok { 832 + return language 830 833 } 831 834 832 - return nil 835 + if len(content) == 0 { 836 + return enry.OtherLanguage 837 + } 838 + 839 + return enry.GetLanguage(node.Name, content) 833 840 } 834 841 835 842 func (h *Handle) RepoForkSync(w http.ResponseWriter, r *http.Request) {
+1 -1
types/repo.go
··· 117 117 118 118 type RepoLanguageResponse struct { 119 119 // Language: File count 120 - Languages map[string]int `json:"languages"` 120 + Languages map[string]int64 `json:"languages"` 121 121 }