appview: use repo_language table to access language info #317

merged
opened by oppi.li targeting master from push-snktzuwttuvu

this falls back to calling RepoLanguages on the knot, and caches that info at the appview.

Signed-off-by: oppiliappan me@oppi.li

Changed files
+100 -37
appview
repo
state
+44 -25
appview/repo/index.go
··· 123 } 124 } 125 126 - languageInfo, err := getLanguageInfo(f, signedClient, ref) 127 if err != nil { 128 log.Printf("failed to compute language percentages: %s", err) 129 // non-fatal ··· 153 Languages: languageInfo, 154 Pipelines: pipelines, 155 }) 156 - return 157 } 158 159 - func getLanguageInfo( 160 f *reporesolver.ResolvedRepo, 161 signedClient *knotclient.SignedClient, 162 ref string, 163 ) ([]types.RepoLanguageDetails, error) { 164 - repoLanguages, err := signedClient.RepoLanguages(f.OwnerDid(), f.RepoName, ref) 165 - if err != nil { 166 - return []types.RepoLanguageDetails{}, err 167 - } 168 - if repoLanguages == nil { 169 - repoLanguages = &types.RepoLanguageResponse{Languages: make(map[string]int64)} 170 } 171 172 - var totalSize int64 173 - for _, fileSize := range repoLanguages.Languages { 174 - totalSize += fileSize 175 } 176 177 var languageStats []types.RepoLanguageDetails 178 - var otherPercentage float32 = 0 179 - 180 - for lang, size := range repoLanguages.Languages { 181 - percentage := (float32(size) / float32(totalSize)) * 100 182 - 183 - if percentage <= 0.5 { 184 - otherPercentage += percentage 185 - continue 186 - } 187 - 188 - color := enry.GetColor(lang) 189 - 190 - languageStats = append(languageStats, types.RepoLanguageDetails{Name: lang, Percentage: percentage, Color: color}) 191 } 192 193 sort.Slice(languageStats, func(i, j int) bool {
··· 123 } 124 } 125 126 + languageInfo, err := rp.getLanguageInfo(f, signedClient, ref) 127 if err != nil { 128 log.Printf("failed to compute language percentages: %s", err) 129 // non-fatal ··· 153 Languages: languageInfo, 154 Pipelines: pipelines, 155 }) 156 } 157 158 + func (rp *Repo) getLanguageInfo( 159 f *reporesolver.ResolvedRepo, 160 signedClient *knotclient.SignedClient, 161 ref string, 162 ) ([]types.RepoLanguageDetails, error) { 163 + // first attempt to fetch from db 164 + langs, err := db.GetRepoLanguages( 165 + rp.db, 166 + db.FilterEq("repo_at", f.RepoAt), 167 + db.FilterEq("ref", ref), 168 + ) 169 + 170 + if err != nil || langs == nil { 171 + // non-fatal, fetch langs from ks 172 + ls, err := signedClient.RepoLanguages(f.OwnerDid(), f.RepoName, ref) 173 + if err != nil { 174 + return nil, err 175 + } 176 + if ls == nil { 177 + return nil, nil 178 + } 179 + for l, s := range ls.Languages { 180 + langs = append(langs, db.RepoLanguage{ 181 + RepoAt: f.RepoAt, 182 + Ref: ref, 183 + Language: l, 184 + Bytes: s, 185 + }) 186 + } 187 + 188 + // update appview's cache 189 + err = db.InsertRepoLanguages(rp.db, langs) 190 + if err != nil { 191 + // non-fatal 192 + log.Println("failed to cache lang results", err) 193 + } 194 } 195 196 + var total int64 197 + for _, l := range langs { 198 + total += l.Bytes 199 } 200 201 var languageStats []types.RepoLanguageDetails 202 + for _, l := range langs { 203 + percentage := float32(l.Bytes) / float32(total) * 100 204 + color := enry.GetColor(l.Language) 205 + languageStats = append(languageStats, types.RepoLanguageDetails{ 206 + Name: l.Language, 207 + Percentage: percentage, 208 + Color: color, 209 + }) 210 } 211 212 sort.Slice(languageStats, func(i, j int) bool {
+56 -12
appview/state/knotstream.go
··· 3 import ( 4 "context" 5 "encoding/json" 6 "fmt" 7 "slices" 8 "time" ··· 18 "tangled.sh/tangled.sh/core/workflow" 19 20 "github.com/bluesky-social/indigo/atproto/syntax" 21 "github.com/posthog/posthog-go" 22 ) 23 ··· 39 40 cfg := ec.ConsumerConfig{ 41 Sources: srcs, 42 - ProcessFunc: knotIngester(ctx, d, enforcer, posthog, c.Core.Dev), 43 RetryInterval: c.Knotstream.RetryInterval, 44 MaxRetryInterval: c.Knotstream.MaxRetryInterval, 45 ConnectionTimeout: c.Knotstream.ConnectionTimeout, ··· 53 return ec.NewConsumer(cfg), nil 54 } 55 56 - func knotIngester(ctx context.Context, d *db.DB, enforcer *rbac.Enforcer, posthog posthog.Client, dev bool) ec.ProcessFunc { 57 return func(ctx context.Context, source ec.Source, msg ec.Message) error { 58 switch msg.Nsid { 59 case tangled.GitRefUpdateNSID: ··· 81 return fmt.Errorf("%s does not belong to %s, something is fishy", record.CommitterDid, source.Key()) 82 } 83 84 knownEmails, err := db.GetAllEmails(d, record.CommitterDid) 85 if err != nil { 86 return err 87 } 88 count := 0 89 for _, ke := range knownEmails { 90 if record.Meta == nil { ··· 108 Date: time.Now(), 109 Count: count, 110 } 111 - if err := db.AddPunch(d, punch); err != nil { 112 - return err 113 } 114 115 - if !dev { 116 - err = pc.Enqueue(posthog.Capture{ 117 - DistinctId: record.CommitterDid, 118 - Event: "git_ref_update", 119 - }) 120 - if err != nil { 121 - // non-fatal, TODO: log this 122 } 123 } 124 125 - return nil 126 } 127 128 func ingestPipeline(d *db.DB, source ec.Source, msg ec.Message) error {
··· 3 import ( 4 "context" 5 "encoding/json" 6 + "errors" 7 "fmt" 8 "slices" 9 "time" ··· 19 "tangled.sh/tangled.sh/core/workflow" 20 21 "github.com/bluesky-social/indigo/atproto/syntax" 22 + "github.com/go-git/go-git/v5/plumbing" 23 "github.com/posthog/posthog-go" 24 ) 25 ··· 41 42 cfg := ec.ConsumerConfig{ 43 Sources: srcs, 44 + ProcessFunc: knotIngester(d, enforcer, posthog, c.Core.Dev), 45 RetryInterval: c.Knotstream.RetryInterval, 46 MaxRetryInterval: c.Knotstream.MaxRetryInterval, 47 ConnectionTimeout: c.Knotstream.ConnectionTimeout, ··· 55 return ec.NewConsumer(cfg), nil 56 } 57 58 + func knotIngester(d *db.DB, enforcer *rbac.Enforcer, posthog posthog.Client, dev bool) ec.ProcessFunc { 59 return func(ctx context.Context, source ec.Source, msg ec.Message) error { 60 switch msg.Nsid { 61 case tangled.GitRefUpdateNSID: ··· 83 return fmt.Errorf("%s does not belong to %s, something is fishy", record.CommitterDid, source.Key()) 84 } 85 86 + err1 := populatePunchcard(d, record) 87 + err2 := updateRepoLanguages(d, record) 88 + 89 + var err3 error 90 + if !dev { 91 + err3 = pc.Enqueue(posthog.Capture{ 92 + DistinctId: record.CommitterDid, 93 + Event: "git_ref_update", 94 + }) 95 + } 96 + 97 + return errors.Join(err1, err2, err3) 98 + } 99 + 100 + func populatePunchcard(d *db.DB, record tangled.GitRefUpdate) error { 101 knownEmails, err := db.GetAllEmails(d, record.CommitterDid) 102 if err != nil { 103 return err 104 } 105 + 106 count := 0 107 for _, ke := range knownEmails { 108 if record.Meta == nil { ··· 126 Date: time.Now(), 127 Count: count, 128 } 129 + return db.AddPunch(d, punch) 130 + } 131 + 132 + func updateRepoLanguages(d *db.DB, record tangled.GitRefUpdate) error { 133 + if record.Meta == nil && record.Meta.LangBreakdown == nil { 134 + return fmt.Errorf("empty language data for repo: %s/%s", record.RepoDid, record.RepoName) 135 } 136 137 + repos, err := db.GetRepos( 138 + d, 139 + db.FilterEq("did", record.RepoDid), 140 + db.FilterEq("name", record.RepoName), 141 + ) 142 + if err != nil { 143 + return fmt.Errorf("failed to look for repo in DB (%s/%s): %w", record.RepoDid, record.RepoName, err) 144 + } 145 + if len(repos) != 1 { 146 + return fmt.Errorf("incorrect number of repos returned: %d (expected 1)", len(repos)) 147 + } 148 + repo := repos[0] 149 + 150 + ref := plumbing.ReferenceName(record.Ref) 151 + if !ref.IsBranch() { 152 + return fmt.Errorf("%s is not a valid reference name", ref) 153 + } 154 + 155 + var langs []db.RepoLanguage 156 + for _, l := range record.Meta.LangBreakdown.Inputs { 157 + if l == nil { 158 + continue 159 } 160 + 161 + langs = append(langs, db.RepoLanguage{ 162 + RepoAt: repo.RepoAt(), 163 + Ref: ref.Short(), 164 + Language: l.Lang, 165 + Bytes: l.Size, 166 + }) 167 } 168 169 + return db.InsertRepoLanguages(d, langs) 170 } 171 172 func ingestPipeline(d *db.DB, source ec.Source, msg ec.Message) error {