knotserver: simplify language calculation logic #270

merged
opened by oppi.li targeting master from push-zvkrywwskknq

reference forgejo's language analysis a bit more, and calculate ratios based on number of bytes

Signed-off-by: oppiliappan me@oppi.li

Changed files
+59 -53
knotserver
types
+14 -15
knotserver/git/git.go
··· 2 3 import ( 4 "archive/tar" 5 "fmt" 6 "io" 7 "io/fs" ··· 201 } 202 203 func (g *GitRepo) FileContentN(path string, cap int64) ([]byte, error) { 204 - buf := []byte{} 205 - 206 c, err := g.r.CommitObject(g.h) 207 if err != nil { 208 return nil, fmt.Errorf("commit object: %w", err) ··· 219 } 220 221 isbin, _ := file.IsBinary() 222 - 223 - if !isbin { 224 - reader, err := file.Reader() 225 - if err != nil { 226 - return nil, err 227 - } 228 - bufReader := io.LimitReader(reader, cap) 229 - _, err = bufReader.Read(buf) 230 - if err != nil { 231 - return nil, err 232 - } 233 - return buf, nil 234 - } else { 235 return nil, ErrBinaryFile 236 } 237 } 238 239 func (g *GitRepo) FileContent(path string) (string, error) {
··· 2 3 import ( 4 "archive/tar" 5 + "bytes" 6 "fmt" 7 "io" 8 "io/fs" ··· 202 } 203 204 func (g *GitRepo) FileContentN(path string, cap int64) ([]byte, error) { 205 c, err := g.r.CommitObject(g.h) 206 if err != nil { 207 return nil, fmt.Errorf("commit object: %w", err) ··· 218 } 219 220 isbin, _ := file.IsBinary() 221 + if isbin { 222 return nil, ErrBinaryFile 223 } 224 + 225 + reader, err := file.Reader() 226 + if err != nil { 227 + return nil, err 228 + } 229 + 230 + buf := new(bytes.Buffer) 231 + if _, err = buf.ReadFrom(io.LimitReader(reader, cap)); err != nil { 232 + return nil, err 233 + } 234 + 235 + return buf.Bytes(), nil 236 } 237 238 func (g *GitRepo) FileContent(path string) (string, error) {
+44 -37
knotserver/routes.go
··· 18 "strconv" 19 "strings" 20 "sync" 21 22 securejoin "github.com/cyphar/filepath-securejoin" 23 "github.com/gliderlabs/ssh" ··· 763 } 764 765 func (h *Handle) RepoLanguages(w http.ResponseWriter, r *http.Request) { 766 - path, _ := securejoin.SecureJoin(h.c.Repo.ScanPath, didPath(r)) 767 ref := chi.URLParam(r, "ref") 768 ref, _ = url.PathUnescape(ref) 769 770 l := h.l.With("handler", "RepoLanguages") 771 772 - gr, err := git.Open(path, ref) 773 if err != nil { 774 l.Error("opening repo", "error", err.Error()) 775 notFound(w) 776 return 777 } 778 779 - languageFileCount := make(map[string]int) 780 781 - err = recurseEntireTree(r.Context(), gr, func(absPath string) { 782 - lang, safe := enry.GetLanguageByExtension(absPath) 783 - if len(lang) == 0 || !safe { 784 - content, _ := gr.FileContentN(absPath, 1024) 785 - if !safe { 786 - lang = enry.GetLanguage(absPath, content) 787 - if len(lang) == 0 { 788 - lang = "Other" 789 - } 790 - } else { 791 - lang, _ = enry.GetLanguageByContent(absPath, content) 792 - if len(lang) == 0 { 793 - lang = "Other" 794 - } 795 - } 796 } 797 798 - v, ok := languageFileCount[lang] 799 - if ok { 800 - languageFileCount[lang] = v + 1 801 - } else { 802 - languageFileCount[lang] = 1 803 } 804 - }, "") 805 if err != nil { 806 l.Error("failed to recurse file tree", "error", err.Error()) 807 writeError(w, err.Error(), http.StatusNoContent) 808 return 809 } 810 811 - resp := types.RepoLanguageResponse{Languages: languageFileCount} 812 813 writeJSON(w, resp) 814 return 815 } 816 817 - func recurseEntireTree(ctx context.Context, git *git.GitRepo, callback func(absPath string), filePath string) error { 818 - files, err := git.FileTree(ctx, filePath) 819 - if err != nil { 820 - log.Println(err) 821 - return err 822 } 823 824 - for _, file := range files { 825 - absPath := path.Join(filePath, file.Name) 826 - if !file.IsFile { 827 - return recurseEntireTree(ctx, git, callback, absPath) 828 - } 829 - callback(absPath) 830 } 831 832 - return nil 833 } 834 835 func (h *Handle) RepoForkSync(w http.ResponseWriter, r *http.Request) {
··· 18 "strconv" 19 "strings" 20 "sync" 21 + "time" 22 23 securejoin "github.com/cyphar/filepath-securejoin" 24 "github.com/gliderlabs/ssh" ··· 764 } 765 766 func (h *Handle) RepoLanguages(w http.ResponseWriter, r *http.Request) { 767 + repoPath, _ := securejoin.SecureJoin(h.c.Repo.ScanPath, didPath(r)) 768 ref := chi.URLParam(r, "ref") 769 ref, _ = url.PathUnescape(ref) 770 771 l := h.l.With("handler", "RepoLanguages") 772 773 + gr, err := git.Open(repoPath, ref) 774 if err != nil { 775 l.Error("opening repo", "error", err.Error()) 776 notFound(w) 777 return 778 } 779 780 + sizes := make(map[string]int64) 781 782 + ctx, cancel := context.WithTimeout(r.Context(), 1*time.Second) 783 + defer cancel() 784 + 785 + err = gr.Walk(ctx, "", func(node object.TreeEntry, parent *object.Tree, root string) error { 786 + filepath := path.Join(root, node.Name) 787 + 788 + content, err := gr.FileContentN(filepath, 16*1024) // 16KB 789 + if err != nil { 790 + return nil 791 } 792 793 + if enry.IsGenerated(filepath, content) { 794 + return nil 795 + } 796 + 797 + language := analyzeLanguage(node, content) 798 + if group := enry.GetLanguageGroup(language); group != "" { 799 + language = group 800 } 801 + 802 + langType := enry.GetLanguageType(language) 803 + if langType != enry.Programming && langType != enry.Markup && langType != enry.Unknown { 804 + return nil 805 + } 806 + 807 + sz, _ := parent.Size(node.Name) 808 + sizes[language] += sz 809 + 810 + return nil 811 + }) 812 if err != nil { 813 l.Error("failed to recurse file tree", "error", err.Error()) 814 writeError(w, err.Error(), http.StatusNoContent) 815 return 816 } 817 818 + resp := types.RepoLanguageResponse{Languages: sizes} 819 820 writeJSON(w, resp) 821 return 822 } 823 824 + func analyzeLanguage(node object.TreeEntry, content []byte) string { 825 + language, ok := enry.GetLanguageByExtension(node.Name) 826 + if ok { 827 + return language 828 } 829 830 + language, ok = enry.GetLanguageByFilename(node.Name) 831 + if ok { 832 + return language 833 } 834 835 + if len(content) == 0 { 836 + return enry.OtherLanguage 837 + } 838 + 839 + return enry.GetLanguage(node.Name, content) 840 } 841 842 func (h *Handle) RepoForkSync(w http.ResponseWriter, r *http.Request) {
+1 -1
types/repo.go
··· 117 118 type RepoLanguageResponse struct { 119 // Language: File count 120 - Languages map[string]int `json:"languages"` 121 }
··· 117 118 type RepoLanguageResponse struct { 119 // Language: File count 120 + Languages map[string]int64 `json:"languages"` 121 }