+1
-1
automod/consumer/firehose.go
+1
-1
automod/consumer/firehose.go
···
116
fc.Logger.Info("hepa scheduler configured", "scheduler", "autoscaling", "initial", scaleSettings.Concurrency, "max", scaleSettings.MaxConcurrency)
117
}
118
119
-
return events.HandleRepoStream(ctx, con, scheduler)
120
}
121
122
// NOTE: for now, this function basically never errors, just logs and returns nil. Should think through error processing better.
···
116
fc.Logger.Info("hepa scheduler configured", "scheduler", "autoscaling", "initial", scaleSettings.Concurrency, "max", scaleSettings.MaxConcurrency)
117
}
118
119
+
return events.HandleRepoStream(ctx, con, scheduler, fc.Logger)
120
}
121
122
// NOTE: for now, this function basically never errors, just logs and returns nil. Should think through error processing better.
+1
-1
bgs/admin.go
+1
-1
bgs/admin.go
+60
-56
bgs/bgs.go
+60
-56
bgs/bgs.go
···
6
"encoding/json"
7
"errors"
8
"fmt"
9
"net"
10
"net/http"
11
_ "net/http/pprof"
12
"net/url"
13
"strconv"
14
"strings"
15
"sync"
···
34
"github.com/gorilla/websocket"
35
"github.com/ipfs/go-cid"
36
ipld "github.com/ipfs/go-ipld-format"
37
-
logging "github.com/ipfs/go-log"
38
"github.com/labstack/echo/v4"
39
"github.com/labstack/echo/v4/middleware"
40
promclient "github.com/prometheus/client_golang/prometheus"
···
45
"gorm.io/gorm"
46
)
47
48
-
var log = logging.Logger("bgs")
49
var tracer = otel.Tracer("bgs")
50
51
// serverListenerBootTimeout is how long to wait for the requested server socket
···
95
// nextCrawlers gets forwarded POST /xrpc/com.atproto.sync.requestCrawl
96
nextCrawlers []*url.URL
97
httpClient http.Client
98
}
99
100
type PDSResync struct {
···
166
pdsResyncs: make(map[uint]*PDSResync),
167
168
userCache: uc,
169
}
170
171
ix.CreateExternalUser = bgs.createExternalUser
···
244
act, err := bgs.Index.GetUserOrMissing(ctx, did)
245
if err != nil {
246
w.WriteHeader(500)
247
-
log.Errorf("failed to get user: %s", err)
248
return
249
}
250
251
if err := bgs.Index.Crawler.Crawl(ctx, act); err != nil {
252
w.WriteHeader(500)
253
-
log.Errorf("failed to add user to crawler: %s", err)
254
return
255
}
256
})
···
335
if err2 := ctx.JSON(err.Code, map[string]any{
336
"error": err.Message,
337
}); err2 != nil {
338
-
log.Errorf("Failed to write http error: %s", err2)
339
}
340
default:
341
sendHeader := true
···
343
sendHeader = false
344
}
345
346
-
log.Warnf("HANDLER ERROR: (%s) %s", ctx.Path(), err)
347
348
if strings.HasPrefix(ctx.Path(), "/admin/") {
349
ctx.JSON(500, map[string]any{
···
436
437
func (bgs *BGS) HandleHealthCheck(c echo.Context) error {
438
if err := bgs.db.Exec("SELECT 1").Error; err != nil {
439
-
log.Errorf("healthcheck can't connect to database: %v", err)
440
return c.JSON(500, HealthStatus{Status: "error", Message: "can't connect to database"})
441
} else {
442
return c.JSON(200, HealthStatus{Status: "ok"})
···
603
604
var m = &dto.Metric{}
605
if err := c.EventsSent.Write(m); err != nil {
606
-
log.Errorf("failed to get sent counter: %s", err)
607
}
608
609
-
log.Infow("consumer disconnected",
610
"consumer_id", id,
611
"remote_addr", c.RemoteAddr,
612
"user_agent", c.UserAgent,
···
658
}
659
660
if err := conn.WriteControl(websocket.PingMessage, []byte{}, time.Now().Add(5*time.Second)); err != nil {
661
-
log.Warnf("failed to ping client: %s", err)
662
cancel()
663
return
664
}
···
683
for {
684
_, _, err := conn.ReadMessage()
685
if err != nil {
686
-
log.Warnf("failed to read message from client: %s", err)
687
cancel()
688
return
689
}
···
710
consumerID := bgs.registerConsumer(&consumer)
711
defer bgs.cleanupConsumer(consumerID)
712
713
-
logger := log.With(
714
"consumer_id", consumerID,
715
"remote_addr", consumer.RemoteAddr,
716
"user_agent", consumer.UserAgent,
717
)
718
719
-
logger.Infow("new consumer", "cursor", since)
720
721
for {
722
select {
···
728
729
wc, err := conn.NextWriter(websocket.BinaryMessage)
730
if err != nil {
731
-
logger.Errorf("failed to get next writer: %s", err)
732
return err
733
}
734
···
742
}
743
744
if err := wc.Close(); err != nil {
745
-
logger.Warnf("failed to flush-close our event write: %s", err)
746
return nil
747
}
748
···
763
// defensive in case things change under the hood.
764
registry, ok := promclient.DefaultRegisterer.(*promclient.Registry)
765
if !ok {
766
-
log.Warnf("failed to export default prometheus registry; some metrics will be unavailable; unexpected type: %T", promclient.DefaultRegisterer)
767
}
768
exporter, err := prometheus.NewExporter(prometheus.Options{
769
Registry: registry,
770
Namespace: "bigsky",
771
})
772
if err != nil {
773
-
log.Errorf("could not create the prometheus stats exporter: %v", err)
774
}
775
776
return exporter
···
885
case env.RepoCommit != nil:
886
repoCommitsReceivedCounter.WithLabelValues(host.Host).Add(1)
887
evt := env.RepoCommit
888
-
log.Debugw("bgs got repo append event", "seq", evt.Seq, "pdsHost", host.Host, "repo", evt.Repo)
889
890
s := time.Now()
891
u, err := bgs.lookupUserByDid(ctx, evt.Repo)
···
915
916
if u.GetTakenDown() || ustatus == events.AccountStatusTakendown {
917
span.SetAttributes(attribute.Bool("taken_down_by_relay_admin", u.GetTakenDown()))
918
-
log.Debugw("dropping commit event from taken down user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host)
919
repoCommitsResultCounter.WithLabelValues(host.Host, "tdu").Inc()
920
return nil
921
}
922
923
if ustatus == events.AccountStatusSuspended {
924
-
log.Debugw("dropping commit event from suspended user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host)
925
repoCommitsResultCounter.WithLabelValues(host.Host, "susu").Inc()
926
return nil
927
}
928
929
if ustatus == events.AccountStatusDeactivated {
930
-
log.Debugw("dropping commit event from deactivated user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host)
931
repoCommitsResultCounter.WithLabelValues(host.Host, "du").Inc()
932
return nil
933
}
···
938
}
939
940
if host.ID != u.PDS && u.PDS != 0 {
941
-
log.Warnw("received event for repo from different pds than expected", "repo", evt.Repo, "expPds", u.PDS, "gotPds", host.Host)
942
// Flush any cached DID documents for this user
943
bgs.didr.FlushCacheFor(env.RepoCommit.Repo)
944
···
1000
if errors.Is(err, carstore.ErrRepoBaseMismatch) || ipld.IsNotFound(err) {
1001
ai, lerr := bgs.Index.LookupUser(ctx, u.ID)
1002
if lerr != nil {
1003
-
log.Warnw("failed handling event, no user", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "prev", stringLink(evt.Prev), "commit", evt.Commit.String())
1004
repoCommitsResultCounter.WithLabelValues(host.Host, "nou4").Inc()
1005
return fmt.Errorf("failed to look up user %s (%d) (err case: %s): %w", u.Did, u.ID, err, lerr)
1006
}
1007
1008
span.SetAttributes(attribute.Bool("catchup_queue", true))
1009
1010
-
log.Infow("failed handling event, catchup", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "prev", stringLink(evt.Prev), "commit", evt.Commit.String())
1011
repoCommitsResultCounter.WithLabelValues(host.Host, "catchup2").Inc()
1012
return bgs.Index.Crawler.AddToCatchupQueue(ctx, host, ai, evt)
1013
}
1014
1015
-
log.Warnw("failed handling event", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "prev", stringLink(evt.Prev), "commit", evt.Commit.String())
1016
repoCommitsResultCounter.WithLabelValues(host.Host, "err").Inc()
1017
return fmt.Errorf("handle user event failed: %w", err)
1018
}
···
1020
repoCommitsResultCounter.WithLabelValues(host.Host, "ok").Inc()
1021
return nil
1022
case env.RepoHandle != nil:
1023
-
log.Infow("bgs got repo handle event", "did", env.RepoHandle.Did, "handle", env.RepoHandle.Handle)
1024
// Flush any cached DID documents for this user
1025
bgs.didr.FlushCacheFor(env.RepoHandle.Did)
1026
···
1031
}
1032
1033
if act.Handle.String != env.RepoHandle.Handle {
1034
-
log.Warnw("handle update did not update handle to asserted value", "did", env.RepoHandle.Did, "expected", env.RepoHandle.Handle, "actual", act.Handle)
1035
}
1036
1037
// TODO: Update the ReposHandle event type to include "verified" or something
···
1045
},
1046
})
1047
if err != nil {
1048
-
log.Errorw("failed to broadcast RepoHandle event", "error", err, "did", env.RepoHandle.Did, "handle", env.RepoHandle.Handle)
1049
return fmt.Errorf("failed to broadcast RepoHandle event: %w", err)
1050
}
1051
1052
return nil
1053
case env.RepoIdentity != nil:
1054
-
log.Infow("bgs got identity event", "did", env.RepoIdentity.Did)
1055
// Flush any cached DID documents for this user
1056
bgs.didr.FlushCacheFor(env.RepoIdentity.Did)
1057
···
1071
},
1072
})
1073
if err != nil {
1074
-
log.Errorw("failed to broadcast Identity event", "error", err, "did", env.RepoIdentity.Did)
1075
return fmt.Errorf("failed to broadcast Identity event: %w", err)
1076
}
1077
···
1087
span.SetAttributes(attribute.String("repo_status", *env.RepoAccount.Status))
1088
}
1089
1090
-
log.Infow("bgs got account event", "did", env.RepoAccount.Did)
1091
// Flush any cached DID documents for this user
1092
bgs.didr.FlushCacheFor(env.RepoAccount.Did)
1093
···
1101
// Check if the PDS is still authoritative
1102
// if not we don't want to be propagating this account event
1103
if ai.PDS != host.ID {
1104
-
log.Errorw("account event from non-authoritative pds",
1105
"seq", env.RepoAccount.Seq,
1106
"did", env.RepoAccount.Did,
1107
"event_from", host.Host,
···
1146
},
1147
})
1148
if err != nil {
1149
-
log.Errorw("failed to broadcast Account event", "error", err, "did", env.RepoAccount.Did)
1150
return fmt.Errorf("failed to broadcast Account event: %w", err)
1151
}
1152
···
1194
// delete data from carstore
1195
if err := bgs.repoman.TakeDownRepo(ctx, u.ID); err != nil {
1196
// don't let a failure here prevent us from propagating this event
1197
-
log.Errorf("failed to delete user data from carstore: %s", err)
1198
}
1199
1200
return bgs.events.AddEvent(ctx, &events.XRPCStreamEvent{
···
1209
1210
externalUserCreationAttempts.Inc()
1211
1212
-
log.Debugf("create external user: %s", did)
1213
doc, err := s.didr.GetDocument(ctx, did)
1214
if err != nil {
1215
return nil, fmt.Errorf("could not locate DID document for followed user (%s): %w", did, err)
···
1232
// TODO: the PDS's DID should also be in the service, we could use that to look up?
1233
var peering models.PDS
1234
if err := s.db.Find(&peering, "host = ?", durl.Host).Error; err != nil {
1235
-
log.Error("failed to find pds", durl.Host)
1236
return nil, err
1237
}
1238
···
1305
defer func() {
1306
if !successfullyCreated {
1307
if err := s.db.Model(&models.PDS{}).Where("id = ?", peering.ID).Update("repo_count", gorm.Expr("repo_count - 1")).Error; err != nil {
1308
-
log.Errorf("failed to decrement repo count for pds: %s", err)
1309
}
1310
}
1311
}()
···
1319
return nil, err
1320
}
1321
1322
-
log.Debugw("creating external user", "did", did, "handle", hurl.Host, "pds", peering.ID)
1323
1324
handle := hurl.Host
1325
···
1327
1328
resdid, err := s.hr.ResolveHandleToDid(ctx, handle)
1329
if err != nil {
1330
-
log.Errorf("failed to resolve users claimed handle (%q) on pds: %s", handle, err)
1331
validHandle = false
1332
}
1333
1334
if resdid != did {
1335
-
log.Errorf("claimed handle did not match servers response (%s != %s)", resdid, did)
1336
validHandle = false
1337
}
1338
···
1341
1342
exu, err := s.Index.LookupUserByDid(ctx, did)
1343
if err == nil {
1344
-
log.Debugw("lost the race to create a new user", "did", did, "handle", handle, "existing_hand", exu.Handle)
1345
if exu.PDS != peering.ID {
1346
// User is now on a different PDS, update
1347
if err := s.db.Model(User{}).Where("id = ?", exu.Uid).Update("pds", peering.ID).Error; err != nil {
···
1500
// delete data from carstore
1501
if err := bgs.repoman.TakeDownRepo(ctx, u.ID); err != nil {
1502
// don't let a failure here prevent us from propagating this event
1503
-
log.Errorf("failed to delete user data from carstore: %s", err)
1504
}
1505
}
1506
···
1607
func (bgs *BGS) ResyncPDS(ctx context.Context, pds models.PDS) error {
1608
ctx, span := tracer.Start(ctx, "ResyncPDS")
1609
defer span.End()
1610
-
log := log.With("pds", pds.Host, "source", "resync_pds")
1611
resync, found := bgs.LoadOrStoreResync(pds)
1612
if found {
1613
return fmt.Errorf("resync already in progress")
···
1639
for {
1640
pages++
1641
if pages%10 == 0 {
1642
-
log.Warnw("fetching PDS page during resync", "pages", pages, "total_repos", len(repos))
1643
resync.NumRepoPages = pages
1644
resync.NumRepos = len(repos)
1645
bgs.UpdateResync(resync)
1646
}
1647
if err := limiter.Wait(ctx); err != nil {
1648
-
log.Errorw("failed to wait for rate limiter", "error", err)
1649
return fmt.Errorf("failed to wait for rate limiter: %w", err)
1650
}
1651
repoList, err := comatproto.SyncListRepos(ctx, &xrpcc, cursor, limit)
1652
if err != nil {
1653
-
log.Errorw("failed to list repos", "error", err)
1654
return fmt.Errorf("failed to list repos: %w", err)
1655
}
1656
···
1672
1673
repolistDone := time.Now()
1674
1675
-
log.Warnw("listed all repos, checking roots", "num_repos", len(repos), "took", repolistDone.Sub(start))
1676
resync = bgs.SetResyncStatus(pds.ID, "checking revs")
1677
1678
// run loop over repos with some concurrency
···
1681
// Check repo revs against our local copy and enqueue crawls for any that are out of date
1682
for i, r := range repos {
1683
if err := sem.Acquire(ctx, 1); err != nil {
1684
-
log.Errorw("failed to acquire semaphore", "error", err)
1685
continue
1686
}
1687
go func(r comatproto.SyncListRepos_Repo) {
1688
defer sem.Release(1)
1689
-
log := log.With("did", r.Did, "remote_rev", r.Rev)
1690
// Fetches the user if we have it, otherwise automatically enqueues it for crawling
1691
ai, err := bgs.Index.GetUserOrMissing(ctx, r.Did)
1692
if err != nil {
1693
-
log.Errorw("failed to get user while resyncing PDS, we can't recrawl it", "error", err)
1694
return
1695
}
1696
1697
rev, err := bgs.repoman.GetRepoRev(ctx, ai.Uid)
1698
if err != nil {
1699
-
log.Warnw("recrawling because we failed to get the local repo root", "err", err, "uid", ai.Uid)
1700
err := bgs.Index.Crawler.Crawl(ctx, ai)
1701
if err != nil {
1702
-
log.Errorw("failed to enqueue crawl for repo during resync", "error", err, "uid", ai.Uid, "did", ai.Did)
1703
}
1704
return
1705
}
1706
1707
if rev == "" || rev < r.Rev {
1708
-
log.Warnw("recrawling because the repo rev from the PDS is newer than our local repo rev", "local_rev", rev)
1709
err := bgs.Index.Crawler.Crawl(ctx, ai)
1710
if err != nil {
1711
-
log.Errorw("failed to enqueue crawl for repo during resync", "error", err, "uid", ai.Uid, "did", ai.Did)
1712
}
1713
return
1714
}
1715
}(r)
1716
if i%100 == 0 {
1717
if i%10_000 == 0 {
1718
-
log.Warnw("checked revs during resync", "num_repos_checked", i, "num_repos_to_crawl", -1, "took", time.Now().Sub(resync.StatusChangedAt))
1719
}
1720
resync.NumReposChecked = i
1721
bgs.UpdateResync(resync)
···
1725
resync.NumReposChecked = len(repos)
1726
bgs.UpdateResync(resync)
1727
1728
-
log.Warnw("enqueued all crawls, exiting resync", "took", time.Now().Sub(start), "num_repos_to_crawl", -1)
1729
1730
return nil
1731
}
···
6
"encoding/json"
7
"errors"
8
"fmt"
9
+
"log/slog"
10
"net"
11
"net/http"
12
_ "net/http/pprof"
13
"net/url"
14
+
"reflect"
15
"strconv"
16
"strings"
17
"sync"
···
36
"github.com/gorilla/websocket"
37
"github.com/ipfs/go-cid"
38
ipld "github.com/ipfs/go-ipld-format"
39
"github.com/labstack/echo/v4"
40
"github.com/labstack/echo/v4/middleware"
41
promclient "github.com/prometheus/client_golang/prometheus"
···
46
"gorm.io/gorm"
47
)
48
49
var tracer = otel.Tracer("bgs")
50
51
// serverListenerBootTimeout is how long to wait for the requested server socket
···
95
// nextCrawlers gets forwarded POST /xrpc/com.atproto.sync.requestCrawl
96
nextCrawlers []*url.URL
97
httpClient http.Client
98
+
99
+
log *slog.Logger
100
}
101
102
type PDSResync struct {
···
168
pdsResyncs: make(map[uint]*PDSResync),
169
170
userCache: uc,
171
+
172
+
log: slog.Default().With("system", "bgs"),
173
}
174
175
ix.CreateExternalUser = bgs.createExternalUser
···
248
act, err := bgs.Index.GetUserOrMissing(ctx, did)
249
if err != nil {
250
w.WriteHeader(500)
251
+
bgs.log.Error("failed to get user", "err", err)
252
return
253
}
254
255
if err := bgs.Index.Crawler.Crawl(ctx, act); err != nil {
256
w.WriteHeader(500)
257
+
bgs.log.Error("failed to add user to crawler", "err", err)
258
return
259
}
260
})
···
339
if err2 := ctx.JSON(err.Code, map[string]any{
340
"error": err.Message,
341
}); err2 != nil {
342
+
bgs.log.Error("Failed to write http error", "err", err2)
343
}
344
default:
345
sendHeader := true
···
347
sendHeader = false
348
}
349
350
+
bgs.log.Warn("HANDLER ERROR: (%s) %s", ctx.Path(), err)
351
352
if strings.HasPrefix(ctx.Path(), "/admin/") {
353
ctx.JSON(500, map[string]any{
···
440
441
func (bgs *BGS) HandleHealthCheck(c echo.Context) error {
442
if err := bgs.db.Exec("SELECT 1").Error; err != nil {
443
+
bgs.log.Error("healthcheck can't connect to database", "err", err)
444
return c.JSON(500, HealthStatus{Status: "error", Message: "can't connect to database"})
445
} else {
446
return c.JSON(200, HealthStatus{Status: "ok"})
···
607
608
var m = &dto.Metric{}
609
if err := c.EventsSent.Write(m); err != nil {
610
+
bgs.log.Error("failed to get sent counter", "err", err)
611
}
612
613
+
bgs.log.Info("consumer disconnected",
614
"consumer_id", id,
615
"remote_addr", c.RemoteAddr,
616
"user_agent", c.UserAgent,
···
662
}
663
664
if err := conn.WriteControl(websocket.PingMessage, []byte{}, time.Now().Add(5*time.Second)); err != nil {
665
+
bgs.log.Warn("failed to ping client: %s", err)
666
cancel()
667
return
668
}
···
687
for {
688
_, _, err := conn.ReadMessage()
689
if err != nil {
690
+
bgs.log.Warn("failed to read message from client: %s", err)
691
cancel()
692
return
693
}
···
714
consumerID := bgs.registerConsumer(&consumer)
715
defer bgs.cleanupConsumer(consumerID)
716
717
+
logger := bgs.log.With(
718
"consumer_id", consumerID,
719
"remote_addr", consumer.RemoteAddr,
720
"user_agent", consumer.UserAgent,
721
)
722
723
+
logger.Info("new consumer", "cursor", since)
724
725
for {
726
select {
···
732
733
wc, err := conn.NextWriter(websocket.BinaryMessage)
734
if err != nil {
735
+
logger.Error("failed to get next writer", "err", err)
736
return err
737
}
738
···
746
}
747
748
if err := wc.Close(); err != nil {
749
+
logger.Warn("failed to flush-close our event write", "err", err)
750
return nil
751
}
752
···
767
// defensive in case things change under the hood.
768
registry, ok := promclient.DefaultRegisterer.(*promclient.Registry)
769
if !ok {
770
+
slog.Warn("failed to export default prometheus registry; some metrics will be unavailable; unexpected type", "type", reflect.TypeOf(promclient.DefaultRegisterer))
771
}
772
exporter, err := prometheus.NewExporter(prometheus.Options{
773
Registry: registry,
774
Namespace: "bigsky",
775
})
776
if err != nil {
777
+
slog.Error("could not create the prometheus stats exporter", "err", err, "system", "bgs")
778
}
779
780
return exporter
···
889
case env.RepoCommit != nil:
890
repoCommitsReceivedCounter.WithLabelValues(host.Host).Add(1)
891
evt := env.RepoCommit
892
+
bgs.log.Debug("bgs got repo append event", "seq", evt.Seq, "pdsHost", host.Host, "repo", evt.Repo)
893
894
s := time.Now()
895
u, err := bgs.lookupUserByDid(ctx, evt.Repo)
···
919
920
if u.GetTakenDown() || ustatus == events.AccountStatusTakendown {
921
span.SetAttributes(attribute.Bool("taken_down_by_relay_admin", u.GetTakenDown()))
922
+
bgs.log.Debug("dropping commit event from taken down user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host)
923
repoCommitsResultCounter.WithLabelValues(host.Host, "tdu").Inc()
924
return nil
925
}
926
927
if ustatus == events.AccountStatusSuspended {
928
+
bgs.log.Debug("dropping commit event from suspended user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host)
929
repoCommitsResultCounter.WithLabelValues(host.Host, "susu").Inc()
930
return nil
931
}
932
933
if ustatus == events.AccountStatusDeactivated {
934
+
bgs.log.Debug("dropping commit event from deactivated user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host)
935
repoCommitsResultCounter.WithLabelValues(host.Host, "du").Inc()
936
return nil
937
}
···
942
}
943
944
if host.ID != u.PDS && u.PDS != 0 {
945
+
bgs.log.Warn("received event for repo from different pds than expected", "repo", evt.Repo, "expPds", u.PDS, "gotPds", host.Host)
946
// Flush any cached DID documents for this user
947
bgs.didr.FlushCacheFor(env.RepoCommit.Repo)
948
···
1004
if errors.Is(err, carstore.ErrRepoBaseMismatch) || ipld.IsNotFound(err) {
1005
ai, lerr := bgs.Index.LookupUser(ctx, u.ID)
1006
if lerr != nil {
1007
+
log.Warn("failed handling event, no user", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "prev", stringLink(evt.Prev), "commit", evt.Commit.String())
1008
repoCommitsResultCounter.WithLabelValues(host.Host, "nou4").Inc()
1009
return fmt.Errorf("failed to look up user %s (%d) (err case: %s): %w", u.Did, u.ID, err, lerr)
1010
}
1011
1012
span.SetAttributes(attribute.Bool("catchup_queue", true))
1013
1014
+
log.Info("failed handling event, catchup", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "prev", stringLink(evt.Prev), "commit", evt.Commit.String())
1015
repoCommitsResultCounter.WithLabelValues(host.Host, "catchup2").Inc()
1016
return bgs.Index.Crawler.AddToCatchupQueue(ctx, host, ai, evt)
1017
}
1018
1019
+
log.Warn("failed handling event", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "prev", stringLink(evt.Prev), "commit", evt.Commit.String())
1020
repoCommitsResultCounter.WithLabelValues(host.Host, "err").Inc()
1021
return fmt.Errorf("handle user event failed: %w", err)
1022
}
···
1024
repoCommitsResultCounter.WithLabelValues(host.Host, "ok").Inc()
1025
return nil
1026
case env.RepoHandle != nil:
1027
+
bgs.log.Info("bgs got repo handle event", "did", env.RepoHandle.Did, "handle", env.RepoHandle.Handle)
1028
// Flush any cached DID documents for this user
1029
bgs.didr.FlushCacheFor(env.RepoHandle.Did)
1030
···
1035
}
1036
1037
if act.Handle.String != env.RepoHandle.Handle {
1038
+
bgs.log.Warn("handle update did not update handle to asserted value", "did", env.RepoHandle.Did, "expected", env.RepoHandle.Handle, "actual", act.Handle)
1039
}
1040
1041
// TODO: Update the ReposHandle event type to include "verified" or something
···
1049
},
1050
})
1051
if err != nil {
1052
+
bgs.log.Error("failed to broadcast RepoHandle event", "error", err, "did", env.RepoHandle.Did, "handle", env.RepoHandle.Handle)
1053
return fmt.Errorf("failed to broadcast RepoHandle event: %w", err)
1054
}
1055
1056
return nil
1057
case env.RepoIdentity != nil:
1058
+
bgs.log.Info("bgs got identity event", "did", env.RepoIdentity.Did)
1059
// Flush any cached DID documents for this user
1060
bgs.didr.FlushCacheFor(env.RepoIdentity.Did)
1061
···
1075
},
1076
})
1077
if err != nil {
1078
+
bgs.log.Error("failed to broadcast Identity event", "error", err, "did", env.RepoIdentity.Did)
1079
return fmt.Errorf("failed to broadcast Identity event: %w", err)
1080
}
1081
···
1091
span.SetAttributes(attribute.String("repo_status", *env.RepoAccount.Status))
1092
}
1093
1094
+
bgs.log.Info("bgs got account event", "did", env.RepoAccount.Did)
1095
// Flush any cached DID documents for this user
1096
bgs.didr.FlushCacheFor(env.RepoAccount.Did)
1097
···
1105
// Check if the PDS is still authoritative
1106
// if not we don't want to be propagating this account event
1107
if ai.PDS != host.ID {
1108
+
bgs.log.Error("account event from non-authoritative pds",
1109
"seq", env.RepoAccount.Seq,
1110
"did", env.RepoAccount.Did,
1111
"event_from", host.Host,
···
1150
},
1151
})
1152
if err != nil {
1153
+
bgs.log.Error("failed to broadcast Account event", "error", err, "did", env.RepoAccount.Did)
1154
return fmt.Errorf("failed to broadcast Account event: %w", err)
1155
}
1156
···
1198
// delete data from carstore
1199
if err := bgs.repoman.TakeDownRepo(ctx, u.ID); err != nil {
1200
// don't let a failure here prevent us from propagating this event
1201
+
bgs.log.Error("failed to delete user data from carstore", "err", err)
1202
}
1203
1204
return bgs.events.AddEvent(ctx, &events.XRPCStreamEvent{
···
1213
1214
externalUserCreationAttempts.Inc()
1215
1216
+
s.log.Debug("create external user", "did", did)
1217
doc, err := s.didr.GetDocument(ctx, did)
1218
if err != nil {
1219
return nil, fmt.Errorf("could not locate DID document for followed user (%s): %w", did, err)
···
1236
// TODO: the PDS's DID should also be in the service, we could use that to look up?
1237
var peering models.PDS
1238
if err := s.db.Find(&peering, "host = ?", durl.Host).Error; err != nil {
1239
+
s.log.Error("failed to find pds", "host", durl.Host)
1240
return nil, err
1241
}
1242
···
1309
defer func() {
1310
if !successfullyCreated {
1311
if err := s.db.Model(&models.PDS{}).Where("id = ?", peering.ID).Update("repo_count", gorm.Expr("repo_count - 1")).Error; err != nil {
1312
+
s.log.Error("failed to decrement repo count for pds", "err", err)
1313
}
1314
}
1315
}()
···
1323
return nil, err
1324
}
1325
1326
+
s.log.Debug("creating external user", "did", did, "handle", hurl.Host, "pds", peering.ID)
1327
1328
handle := hurl.Host
1329
···
1331
1332
resdid, err := s.hr.ResolveHandleToDid(ctx, handle)
1333
if err != nil {
1334
+
s.log.Error("failed to resolve users claimed handle on pds", "handle", handle, "err", err)
1335
validHandle = false
1336
}
1337
1338
if resdid != did {
1339
+
s.log.Error("claimed handle did not match servers response", "resdid", resdid, "did", did)
1340
validHandle = false
1341
}
1342
···
1345
1346
exu, err := s.Index.LookupUserByDid(ctx, did)
1347
if err == nil {
1348
+
s.log.Debug("lost the race to create a new user", "did", did, "handle", handle, "existing_hand", exu.Handle)
1349
if exu.PDS != peering.ID {
1350
// User is now on a different PDS, update
1351
if err := s.db.Model(User{}).Where("id = ?", exu.Uid).Update("pds", peering.ID).Error; err != nil {
···
1504
// delete data from carstore
1505
if err := bgs.repoman.TakeDownRepo(ctx, u.ID); err != nil {
1506
// don't let a failure here prevent us from propagating this event
1507
+
bgs.log.Error("failed to delete user data from carstore", "err", err)
1508
}
1509
}
1510
···
1611
func (bgs *BGS) ResyncPDS(ctx context.Context, pds models.PDS) error {
1612
ctx, span := tracer.Start(ctx, "ResyncPDS")
1613
defer span.End()
1614
+
log := bgs.log.With("pds", pds.Host, "source", "resync_pds")
1615
resync, found := bgs.LoadOrStoreResync(pds)
1616
if found {
1617
return fmt.Errorf("resync already in progress")
···
1643
for {
1644
pages++
1645
if pages%10 == 0 {
1646
+
log.Warn("fetching PDS page during resync", "pages", pages, "total_repos", len(repos))
1647
resync.NumRepoPages = pages
1648
resync.NumRepos = len(repos)
1649
bgs.UpdateResync(resync)
1650
}
1651
if err := limiter.Wait(ctx); err != nil {
1652
+
log.Error("failed to wait for rate limiter", "error", err)
1653
return fmt.Errorf("failed to wait for rate limiter: %w", err)
1654
}
1655
repoList, err := comatproto.SyncListRepos(ctx, &xrpcc, cursor, limit)
1656
if err != nil {
1657
+
log.Error("failed to list repos", "error", err)
1658
return fmt.Errorf("failed to list repos: %w", err)
1659
}
1660
···
1676
1677
repolistDone := time.Now()
1678
1679
+
log.Warn("listed all repos, checking roots", "num_repos", len(repos), "took", repolistDone.Sub(start))
1680
resync = bgs.SetResyncStatus(pds.ID, "checking revs")
1681
1682
// run loop over repos with some concurrency
···
1685
// Check repo revs against our local copy and enqueue crawls for any that are out of date
1686
for i, r := range repos {
1687
if err := sem.Acquire(ctx, 1); err != nil {
1688
+
log.Error("failed to acquire semaphore", "error", err)
1689
continue
1690
}
1691
go func(r comatproto.SyncListRepos_Repo) {
1692
defer sem.Release(1)
1693
+
log := bgs.log.With("did", r.Did, "remote_rev", r.Rev)
1694
// Fetches the user if we have it, otherwise automatically enqueues it for crawling
1695
ai, err := bgs.Index.GetUserOrMissing(ctx, r.Did)
1696
if err != nil {
1697
+
log.Error("failed to get user while resyncing PDS, we can't recrawl it", "error", err)
1698
return
1699
}
1700
1701
rev, err := bgs.repoman.GetRepoRev(ctx, ai.Uid)
1702
if err != nil {
1703
+
log.Warn("recrawling because we failed to get the local repo root", "err", err, "uid", ai.Uid)
1704
err := bgs.Index.Crawler.Crawl(ctx, ai)
1705
if err != nil {
1706
+
log.Error("failed to enqueue crawl for repo during resync", "error", err, "uid", ai.Uid, "did", ai.Did)
1707
}
1708
return
1709
}
1710
1711
if rev == "" || rev < r.Rev {
1712
+
log.Warn("recrawling because the repo rev from the PDS is newer than our local repo rev", "local_rev", rev)
1713
err := bgs.Index.Crawler.Crawl(ctx, ai)
1714
if err != nil {
1715
+
log.Error("failed to enqueue crawl for repo during resync", "error", err, "uid", ai.Uid, "did", ai.Did)
1716
}
1717
return
1718
}
1719
}(r)
1720
if i%100 == 0 {
1721
if i%10_000 == 0 {
1722
+
log.Warn("checked revs during resync", "num_repos_checked", i, "num_repos_to_crawl", -1, "took", time.Now().Sub(resync.StatusChangedAt))
1723
}
1724
resync.NumReposChecked = i
1725
bgs.UpdateResync(resync)
···
1729
resync.NumReposChecked = len(repos)
1730
bgs.UpdateResync(resync)
1731
1732
+
bgs.log.Warn("enqueued all crawls, exiting resync", "took", time.Now().Sub(start), "num_repos_to_crawl", -1)
1733
1734
return nil
1735
}
+6
-6
bgs/compactor.go
+6
-6
bgs/compactor.go
···
210
}
211
if c.requeueInterval > 0 {
212
go func() {
213
-
log.Infow("starting compactor requeue routine",
214
"interval", c.requeueInterval,
215
"limit", c.requeueLimit,
216
"shardCount", c.requeueShardCount,
···
226
ctx := context.Background()
227
ctx, span := otel.Tracer("compactor").Start(ctx, "RequeueRoutine")
228
if err := c.EnqueueAllRepos(ctx, bgs, c.requeueLimit, c.requeueShardCount, c.requeueFast); err != nil {
229
-
log.Errorw("failed to enqueue all repos", "err", err)
230
}
231
span.End()
232
}
···
262
time.Sleep(time.Second * 5)
263
continue
264
}
265
-
log.Errorw("failed to compact repo",
266
"err", err,
267
"uid", state.latestUID,
268
"repo", state.latestDID,
···
273
// Pause for a bit to avoid spamming failed compactions
274
time.Sleep(time.Millisecond * 100)
275
} else {
276
-
log.Infow("compacted repo",
277
"uid", state.latestUID,
278
"repo", state.latestDID,
279
"status", state.status,
···
352
func (c *Compactor) EnqueueRepo(ctx context.Context, user *User, fast bool) {
353
ctx, span := otel.Tracer("compactor").Start(ctx, "EnqueueRepo")
354
defer span.End()
355
-
log.Infow("enqueueing compaction for repo", "repo", user.Did, "uid", user.ID, "fast", fast)
356
c.q.Append(user.ID, fast)
357
}
358
···
396
c.q.Append(r.Usr, fast)
397
}
398
399
-
log.Infow("done enqueueing all repos", "repos_enqueued", len(repos))
400
401
return nil
402
}
···
210
}
211
if c.requeueInterval > 0 {
212
go func() {
213
+
log.Info("starting compactor requeue routine",
214
"interval", c.requeueInterval,
215
"limit", c.requeueLimit,
216
"shardCount", c.requeueShardCount,
···
226
ctx := context.Background()
227
ctx, span := otel.Tracer("compactor").Start(ctx, "RequeueRoutine")
228
if err := c.EnqueueAllRepos(ctx, bgs, c.requeueLimit, c.requeueShardCount, c.requeueFast); err != nil {
229
+
log.Error("failed to enqueue all repos", "err", err)
230
}
231
span.End()
232
}
···
262
time.Sleep(time.Second * 5)
263
continue
264
}
265
+
log.Error("failed to compact repo",
266
"err", err,
267
"uid", state.latestUID,
268
"repo", state.latestDID,
···
273
// Pause for a bit to avoid spamming failed compactions
274
time.Sleep(time.Millisecond * 100)
275
} else {
276
+
log.Info("compacted repo",
277
"uid", state.latestUID,
278
"repo", state.latestDID,
279
"status", state.status,
···
352
func (c *Compactor) EnqueueRepo(ctx context.Context, user *User, fast bool) {
353
ctx, span := otel.Tracer("compactor").Start(ctx, "EnqueueRepo")
354
defer span.End()
355
+
log.Info("enqueueing compaction for repo", "repo", user.Did, "uid", user.ID, "fast", fast)
356
c.q.Append(user.ID, fast)
357
}
358
···
396
c.q.Append(r.Usr, fast)
397
}
398
399
+
log.Info("done enqueueing all repos", "repos_enqueued", len(repos))
400
401
return nil
402
}
+26
-23
bgs/fedmgr.go
+26
-23
bgs/fedmgr.go
···
4
"context"
5
"errors"
6
"fmt"
7
"math/rand"
8
"strings"
9
"sync"
···
21
pq "github.com/lib/pq"
22
"gorm.io/gorm"
23
)
24
25
type IndexCallback func(context.Context, *models.PDS, *events.XRPCStreamEvent) error
26
···
129
var errs []error
130
if errs = s.flushCursors(ctx); len(errs) > 0 {
131
for _, err := range errs {
132
-
log.Errorf("failed to flush cursors on shutdown: %s", err)
133
}
134
}
135
log.Info("done flushing PDS cursors on shutdown")
···
142
defer span.End()
143
if errs := s.flushCursors(ctx); len(errs) > 0 {
144
for _, err := range errs {
145
-
log.Errorf("failed to flush cursors: %s", err)
146
}
147
}
148
log.Debug("done flushing PDS cursors")
···
210
errs := <-s.shutdownResult
211
if len(errs) > 0 {
212
for _, err := range errs {
213
-
log.Errorf("shutdown error: %s", err)
214
}
215
}
216
log.Info("slurper shutdown complete")
···
490
url := fmt.Sprintf("%s://%s/xrpc/com.atproto.sync.subscribeRepos?cursor=%d", protocol, host.Host, cursor)
491
con, res, err := d.DialContext(ctx, url, nil)
492
if err != nil {
493
-
log.Warnw("dialing failed", "pdsHost", host.Host, "err", err, "backoff", backoff)
494
time.Sleep(sleepForBackoff(backoff))
495
backoff++
496
497
if backoff > 15 {
498
-
log.Warnw("pds does not appear to be online, disabling for now", "pdsHost", host.Host)
499
if err := s.db.Model(&models.PDS{}).Where("id = ?", host.ID).Update("registered", false).Error; err != nil {
500
-
log.Errorf("failed to unregister failing pds: %w", err)
501
}
502
503
return
···
506
continue
507
}
508
509
-
log.Info("event subscription response code: ", res.StatusCode)
510
511
curCursor := cursor
512
if err := s.handleConnection(ctx, host, con, &cursor, sub); err != nil {
513
if errors.Is(err, ErrTimeoutShutdown) {
514
-
log.Infof("shutting down pds subscription to %s, no activity after %s", host.Host, EventsTimeout)
515
return
516
}
517
-
log.Warnf("connection to %q failed: %s", host.Host, err)
518
}
519
520
if cursor > curCursor {
···
545
546
rsc := &events.RepoStreamCallbacks{
547
RepoCommit: func(evt *comatproto.SyncSubscribeRepos_Commit) error {
548
-
log.Debugw("got remote repo event", "pdsHost", host.Host, "repo", evt.Repo, "seq", evt.Seq)
549
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
550
RepoCommit: evt,
551
}); err != nil {
552
-
log.Errorf("failed handling event from %q (%d): %s", host.Host, evt.Seq, err)
553
}
554
*lastCursor = evt.Seq
555
···
560
return nil
561
},
562
RepoHandle: func(evt *comatproto.SyncSubscribeRepos_Handle) error {
563
-
log.Infow("got remote handle update event", "pdsHost", host.Host, "did", evt.Did, "handle", evt.Handle)
564
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
565
RepoHandle: evt,
566
}); err != nil {
567
-
log.Errorf("failed handling event from %q (%d): %s", host.Host, evt.Seq, err)
568
}
569
*lastCursor = evt.Seq
570
···
575
return nil
576
},
577
RepoMigrate: func(evt *comatproto.SyncSubscribeRepos_Migrate) error {
578
-
log.Infow("got remote repo migrate event", "pdsHost", host.Host, "did", evt.Did, "migrateTo", evt.MigrateTo)
579
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
580
RepoMigrate: evt,
581
}); err != nil {
582
-
log.Errorf("failed handling event from %q (%d): %s", host.Host, evt.Seq, err)
583
}
584
*lastCursor = evt.Seq
585
···
590
return nil
591
},
592
RepoTombstone: func(evt *comatproto.SyncSubscribeRepos_Tombstone) error {
593
-
log.Infow("got remote repo tombstone event", "pdsHost", host.Host, "did", evt.Did)
594
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
595
RepoTombstone: evt,
596
}); err != nil {
597
-
log.Errorf("failed handling event from %q (%d): %s", host.Host, evt.Seq, err)
598
}
599
*lastCursor = evt.Seq
600
···
605
return nil
606
},
607
RepoInfo: func(info *comatproto.SyncSubscribeRepos_Info) error {
608
-
log.Infow("info event", "name", info.Name, "message", info.Message, "pdsHost", host.Host)
609
return nil
610
},
611
RepoIdentity: func(ident *comatproto.SyncSubscribeRepos_Identity) error {
612
-
log.Infow("identity event", "did", ident.Did)
613
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
614
RepoIdentity: ident,
615
}); err != nil {
616
-
log.Errorf("failed handling event from %q (%d): %s", host.Host, ident.Seq, err)
617
}
618
*lastCursor = ident.Seq
619
···
624
return nil
625
},
626
RepoAccount: func(acct *comatproto.SyncSubscribeRepos_Account) error {
627
-
log.Infow("account event", "did", acct.Did, "status", acct.Status)
628
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
629
RepoAccount: acct,
630
}); err != nil {
631
-
log.Errorf("failed handling event from %q (%d): %s", host.Host, acct.Seq, err)
632
}
633
*lastCursor = acct.Seq
634
···
671
con.RemoteAddr().String(),
672
instrumentedRSC.EventHandler,
673
)
674
-
return events.HandleRepoStream(ctx, con, pool)
675
}
676
677
func (s *Slurper) updateCursor(sub *activeSub, curs int64) error {
···
4
"context"
5
"errors"
6
"fmt"
7
+
"log/slog"
8
"math/rand"
9
"strings"
10
"sync"
···
22
pq "github.com/lib/pq"
23
"gorm.io/gorm"
24
)
25
+
26
+
var log = slog.Default().With("system", "bgs")
27
28
type IndexCallback func(context.Context, *models.PDS, *events.XRPCStreamEvent) error
29
···
132
var errs []error
133
if errs = s.flushCursors(ctx); len(errs) > 0 {
134
for _, err := range errs {
135
+
log.Error("failed to flush cursors on shutdown", "err", err)
136
}
137
}
138
log.Info("done flushing PDS cursors on shutdown")
···
145
defer span.End()
146
if errs := s.flushCursors(ctx); len(errs) > 0 {
147
for _, err := range errs {
148
+
log.Error("failed to flush cursors", "err", err)
149
}
150
}
151
log.Debug("done flushing PDS cursors")
···
213
errs := <-s.shutdownResult
214
if len(errs) > 0 {
215
for _, err := range errs {
216
+
log.Error("shutdown error", "err", err)
217
}
218
}
219
log.Info("slurper shutdown complete")
···
493
url := fmt.Sprintf("%s://%s/xrpc/com.atproto.sync.subscribeRepos?cursor=%d", protocol, host.Host, cursor)
494
con, res, err := d.DialContext(ctx, url, nil)
495
if err != nil {
496
+
log.Warn("dialing failed", "pdsHost", host.Host, "err", err, "backoff", backoff)
497
time.Sleep(sleepForBackoff(backoff))
498
backoff++
499
500
if backoff > 15 {
501
+
log.Warn("pds does not appear to be online, disabling for now", "pdsHost", host.Host)
502
if err := s.db.Model(&models.PDS{}).Where("id = ?", host.ID).Update("registered", false).Error; err != nil {
503
+
log.Error("failed to unregister failing pds", "err", err)
504
}
505
506
return
···
509
continue
510
}
511
512
+
log.Info("event subscription response", "code", res.StatusCode)
513
514
curCursor := cursor
515
if err := s.handleConnection(ctx, host, con, &cursor, sub); err != nil {
516
if errors.Is(err, ErrTimeoutShutdown) {
517
+
log.Info("shutting down pds subscription after timeout", "host", host.Host, "time", EventsTimeout)
518
return
519
}
520
+
log.Warn("connection to failed", "host", host.Host, "err", err)
521
}
522
523
if cursor > curCursor {
···
548
549
rsc := &events.RepoStreamCallbacks{
550
RepoCommit: func(evt *comatproto.SyncSubscribeRepos_Commit) error {
551
+
log.Debug("got remote repo event", "pdsHost", host.Host, "repo", evt.Repo, "seq", evt.Seq)
552
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
553
RepoCommit: evt,
554
}); err != nil {
555
+
log.Error("failed handling event", "host", host.Host, "seq", evt.Seq, "err", err)
556
}
557
*lastCursor = evt.Seq
558
···
563
return nil
564
},
565
RepoHandle: func(evt *comatproto.SyncSubscribeRepos_Handle) error {
566
+
log.Info("got remote handle update event", "pdsHost", host.Host, "did", evt.Did, "handle", evt.Handle)
567
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
568
RepoHandle: evt,
569
}); err != nil {
570
+
log.Error("failed handling event", "host", host.Host, "seq", evt.Seq, "err", err)
571
}
572
*lastCursor = evt.Seq
573
···
578
return nil
579
},
580
RepoMigrate: func(evt *comatproto.SyncSubscribeRepos_Migrate) error {
581
+
log.Info("got remote repo migrate event", "pdsHost", host.Host, "did", evt.Did, "migrateTo", evt.MigrateTo)
582
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
583
RepoMigrate: evt,
584
}); err != nil {
585
+
log.Error("failed handling event", "host", host.Host, "seq", evt.Seq, "err", err)
586
}
587
*lastCursor = evt.Seq
588
···
593
return nil
594
},
595
RepoTombstone: func(evt *comatproto.SyncSubscribeRepos_Tombstone) error {
596
+
log.Info("got remote repo tombstone event", "pdsHost", host.Host, "did", evt.Did)
597
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
598
RepoTombstone: evt,
599
}); err != nil {
600
+
log.Error("failed handling event", "host", host.Host, "seq", evt.Seq, "err", err)
601
}
602
*lastCursor = evt.Seq
603
···
608
return nil
609
},
610
RepoInfo: func(info *comatproto.SyncSubscribeRepos_Info) error {
611
+
log.Info("info event", "name", info.Name, "message", info.Message, "pdsHost", host.Host)
612
return nil
613
},
614
RepoIdentity: func(ident *comatproto.SyncSubscribeRepos_Identity) error {
615
+
log.Info("identity event", "did", ident.Did)
616
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
617
RepoIdentity: ident,
618
}); err != nil {
619
+
log.Error("failed handling event", "host", host.Host, "seq", ident.Seq, "err", err)
620
}
621
*lastCursor = ident.Seq
622
···
627
return nil
628
},
629
RepoAccount: func(acct *comatproto.SyncSubscribeRepos_Account) error {
630
+
log.Info("account event", "did", acct.Did, "status", acct.Status)
631
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
632
RepoAccount: acct,
633
}); err != nil {
634
+
log.Error("failed handling event", "host", host.Host, "seq", acct.Seq, "err", err)
635
}
636
*lastCursor = acct.Seq
637
···
674
con.RemoteAddr().String(),
675
instrumentedRSC.EventHandler,
676
)
677
+
return events.HandleRepoStream(ctx, con, pool, nil)
678
}
679
680
func (s *Slurper) updateCursor(sub *activeSub, curs int64) error {
+13
-13
bgs/handlers.go
+13
-13
bgs/handlers.go
···
31
if errors.Is(err, gorm.ErrRecordNotFound) {
32
return nil, echo.NewHTTPError(http.StatusNotFound, "user not found")
33
}
34
-
log.Errorw("failed to lookup user", "err", err, "did", did)
35
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to lookup user")
36
}
37
···
61
if errors.Is(err, mst.ErrNotFound) {
62
return nil, echo.NewHTTPError(http.StatusNotFound, "record not found in repo")
63
}
64
-
log.Errorw("failed to get record from repo", "err", err, "did", did, "collection", collection, "rkey", rkey)
65
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to get record from repo")
66
}
67
···
89
if errors.Is(err, gorm.ErrRecordNotFound) {
90
return nil, echo.NewHTTPError(http.StatusNotFound, "user not found")
91
}
92
-
log.Errorw("failed to lookup user", "err", err, "did", did)
93
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to lookup user")
94
}
95
···
117
// TODO: stream the response
118
buf := new(bytes.Buffer)
119
if err := s.repoman.ReadRepo(ctx, u.ID, since, buf); err != nil {
120
-
log.Errorw("failed to read repo into buffer", "err", err, "did", did)
121
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to read repo into buffer")
122
}
123
···
170
return echo.NewHTTPError(http.StatusUnauthorized, "domain is banned")
171
}
172
173
-
log.Warnf("TODO: better host validation for crawl requests")
174
175
clientHost := fmt.Sprintf("%s://%s", u.Scheme, host)
176
···
191
if len(s.nextCrawlers) != 0 {
192
blob, err := json.Marshal(body)
193
if err != nil {
194
-
log.Warnw("could not forward requestCrawl, json err", "err", err)
195
} else {
196
go func(bodyBlob []byte) {
197
for _, rpu := range s.nextCrawlers {
···
201
response.Body.Close()
202
}
203
if err != nil || response == nil {
204
-
log.Warnw("requestCrawl forward failed", "host", rpu, "err", err)
205
} else if response.StatusCode != http.StatusOK {
206
-
log.Warnw("requestCrawl forward failed", "host", rpu, "status", response.Status)
207
} else {
208
-
log.Infow("requestCrawl forward successful", "host", rpu)
209
}
210
}
211
}(blob)
···
231
if err == gorm.ErrRecordNotFound {
232
return &comatprototypes.SyncListRepos_Output{}, nil
233
}
234
-
log.Errorw("failed to query users", "err", err)
235
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to query users")
236
}
237
···
252
253
root, err := s.repoman.GetRepoRoot(ctx, user.ID)
254
if err != nil {
255
-
log.Errorw("failed to get repo root", "err", err, "did", user.Did)
256
return nil, echo.NewHTTPError(http.StatusInternalServerError, fmt.Sprintf("failed to get repo root for (%s): %v", user.Did, err.Error()))
257
}
258
···
303
304
root, err := s.repoman.GetRepoRoot(ctx, u.ID)
305
if err != nil {
306
-
log.Errorw("failed to get repo root", "err", err, "did", u.Did)
307
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to get repo root")
308
}
309
310
rev, err := s.repoman.GetRepoRev(ctx, u.ID)
311
if err != nil {
312
-
log.Errorw("failed to get repo rev", "err", err, "did", u.Did)
313
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to get repo rev")
314
}
315
···
31
if errors.Is(err, gorm.ErrRecordNotFound) {
32
return nil, echo.NewHTTPError(http.StatusNotFound, "user not found")
33
}
34
+
log.Error("failed to lookup user", "err", err, "did", did)
35
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to lookup user")
36
}
37
···
61
if errors.Is(err, mst.ErrNotFound) {
62
return nil, echo.NewHTTPError(http.StatusNotFound, "record not found in repo")
63
}
64
+
log.Error("failed to get record from repo", "err", err, "did", did, "collection", collection, "rkey", rkey)
65
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to get record from repo")
66
}
67
···
89
if errors.Is(err, gorm.ErrRecordNotFound) {
90
return nil, echo.NewHTTPError(http.StatusNotFound, "user not found")
91
}
92
+
log.Error("failed to lookup user", "err", err, "did", did)
93
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to lookup user")
94
}
95
···
117
// TODO: stream the response
118
buf := new(bytes.Buffer)
119
if err := s.repoman.ReadRepo(ctx, u.ID, since, buf); err != nil {
120
+
log.Error("failed to read repo into buffer", "err", err, "did", did)
121
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to read repo into buffer")
122
}
123
···
170
return echo.NewHTTPError(http.StatusUnauthorized, "domain is banned")
171
}
172
173
+
log.Warn("TODO: better host validation for crawl requests")
174
175
clientHost := fmt.Sprintf("%s://%s", u.Scheme, host)
176
···
191
if len(s.nextCrawlers) != 0 {
192
blob, err := json.Marshal(body)
193
if err != nil {
194
+
log.Warn("could not forward requestCrawl, json err", "err", err)
195
} else {
196
go func(bodyBlob []byte) {
197
for _, rpu := range s.nextCrawlers {
···
201
response.Body.Close()
202
}
203
if err != nil || response == nil {
204
+
log.Warn("requestCrawl forward failed", "host", rpu, "err", err)
205
} else if response.StatusCode != http.StatusOK {
206
+
log.Warn("requestCrawl forward failed", "host", rpu, "status", response.Status)
207
} else {
208
+
log.Info("requestCrawl forward successful", "host", rpu)
209
}
210
}
211
}(blob)
···
231
if err == gorm.ErrRecordNotFound {
232
return &comatprototypes.SyncListRepos_Output{}, nil
233
}
234
+
log.Error("failed to query users", "err", err)
235
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to query users")
236
}
237
···
252
253
root, err := s.repoman.GetRepoRoot(ctx, user.ID)
254
if err != nil {
255
+
log.Error("failed to get repo root", "err", err, "did", user.Did)
256
return nil, echo.NewHTTPError(http.StatusInternalServerError, fmt.Sprintf("failed to get repo root for (%s): %v", user.Did, err.Error()))
257
}
258
···
303
304
root, err := s.repoman.GetRepoRoot(ctx, u.ID)
305
if err != nil {
306
+
log.Error("failed to get repo root", "err", err, "did", u.Did)
307
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to get repo root")
308
}
309
310
rev, err := s.repoman.GetRepoRev(ctx, u.ID)
311
if err != nil {
312
+
log.Error("failed to get repo rev", "err", err, "did", u.Did)
313
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to get repo rev")
314
}
315
+9
-8
carstore/bs.go
+9
-8
carstore/bs.go
···
6
"context"
7
"fmt"
8
"io"
9
"os"
10
"path/filepath"
11
"sort"
···
24
cbor "github.com/ipfs/go-ipld-cbor"
25
ipld "github.com/ipfs/go-ipld-format"
26
"github.com/ipfs/go-libipfs/blocks"
27
-
logging "github.com/ipfs/go-log"
28
car "github.com/ipld/go-car"
29
carutil "github.com/ipld/go-car/util"
30
cbg "github.com/whyrusleeping/cbor-gen"
···
41
var blockGetTotalCounterUsrskip = blockGetTotalCounter.WithLabelValues("true", "miss")
42
var blockGetTotalCounterCached = blockGetTotalCounter.WithLabelValues("false", "hit")
43
var blockGetTotalCounterNormal = blockGetTotalCounter.WithLabelValues("false", "miss")
44
-
45
-
var log = logging.Logger("carstore")
46
47
const MaxSliceLength = 2 << 20
48
···
67
68
lscLk sync.Mutex
69
lastShardCache map[models.Uid]*CarShard
70
}
71
72
func NewCarStore(meta *gorm.DB, roots []string) (CarStore, error) {
···
92
meta: &CarStoreGormMeta{meta: meta},
93
rootDirs: roots,
94
lastShardCache: make(map[models.Uid]*CarShard),
95
}, nil
96
}
97
···
883
if !os.IsNotExist(err) {
884
return err
885
}
886
-
log.Warnw("shard file we tried to delete did not exist", "shard", sh.ID, "path", sh.Path)
887
}
888
}
889
···
1034
st, err := os.Stat(sh.Path)
1035
if err != nil {
1036
if os.IsNotExist(err) {
1037
-
log.Warnw("missing shard, return size of zero", "path", sh.Path, "shard", sh.ID)
1038
return 0, nil
1039
}
1040
return 0, fmt.Errorf("stat %q: %w", sh.Path, err)
···
1155
// still around but we're doing that anyways since compaction isn't a
1156
// perfect process
1157
1158
-
log.Debugw("repo has dirty dupes", "count", len(dupes), "uid", user, "staleRefs", len(staleRefs), "blockRefs", len(brefs))
1159
1160
//return nil, fmt.Errorf("WIP: not currently handling this case")
1161
}
···
1350
}); err != nil {
1351
// If we ever fail to iterate a shard file because its
1352
// corrupted, just log an error and skip the shard
1353
-
log.Errorw("iterating blocks in shard", "shard", s.ID, "err", err, "uid", user)
1354
}
1355
}
1356
···
1368
_ = fi.Close()
1369
1370
if err2 := os.Remove(fi.Name()); err2 != nil {
1371
-
log.Errorf("failed to remove shard file (%s) after failed db transaction: %w", fi.Name(), err2)
1372
}
1373
1374
return err
···
6
"context"
7
"fmt"
8
"io"
9
+
"log/slog"
10
"os"
11
"path/filepath"
12
"sort"
···
25
cbor "github.com/ipfs/go-ipld-cbor"
26
ipld "github.com/ipfs/go-ipld-format"
27
"github.com/ipfs/go-libipfs/blocks"
28
car "github.com/ipld/go-car"
29
carutil "github.com/ipld/go-car/util"
30
cbg "github.com/whyrusleeping/cbor-gen"
···
41
var blockGetTotalCounterUsrskip = blockGetTotalCounter.WithLabelValues("true", "miss")
42
var blockGetTotalCounterCached = blockGetTotalCounter.WithLabelValues("false", "hit")
43
var blockGetTotalCounterNormal = blockGetTotalCounter.WithLabelValues("false", "miss")
44
45
const MaxSliceLength = 2 << 20
46
···
65
66
lscLk sync.Mutex
67
lastShardCache map[models.Uid]*CarShard
68
+
69
+
log *slog.Logger
70
}
71
72
func NewCarStore(meta *gorm.DB, roots []string) (CarStore, error) {
···
92
meta: &CarStoreGormMeta{meta: meta},
93
rootDirs: roots,
94
lastShardCache: make(map[models.Uid]*CarShard),
95
+
log: slog.Default().With("system", "carstore"),
96
}, nil
97
}
98
···
884
if !os.IsNotExist(err) {
885
return err
886
}
887
+
cs.log.Warn("shard file we tried to delete did not exist", "shard", sh.ID, "path", sh.Path)
888
}
889
}
890
···
1035
st, err := os.Stat(sh.Path)
1036
if err != nil {
1037
if os.IsNotExist(err) {
1038
+
slog.Warn("missing shard, return size of zero", "path", sh.Path, "shard", sh.ID, "system", "carstore")
1039
return 0, nil
1040
}
1041
return 0, fmt.Errorf("stat %q: %w", sh.Path, err)
···
1156
// still around but we're doing that anyways since compaction isn't a
1157
// perfect process
1158
1159
+
cs.log.Debug("repo has dirty dupes", "count", len(dupes), "uid", user, "staleRefs", len(staleRefs), "blockRefs", len(brefs))
1160
1161
//return nil, fmt.Errorf("WIP: not currently handling this case")
1162
}
···
1351
}); err != nil {
1352
// If we ever fail to iterate a shard file because its
1353
// corrupted, just log an error and skip the shard
1354
+
cs.log.Error("iterating blocks in shard", "shard", s.ID, "err", err, "uid", user)
1355
}
1356
}
1357
···
1369
_ = fi.Close()
1370
1371
if err2 := os.Remove(fi.Name()); err2 != nil {
1372
+
cs.log.Error("failed to remove shard file after failed db transaction", "path", fi.Name(), "err", err2)
1373
}
1374
1375
return err
+1
-1
cmd/beemo/firehose_consumer.go
+1
-1
cmd/beemo/firehose_consumer.go
+22
-17
cmd/bigsky/main.go
+22
-17
cmd/bigsky/main.go
···
3
import (
4
"context"
5
"fmt"
6
"net/http"
7
_ "net/http/pprof"
8
"net/url"
···
30
_ "go.uber.org/automaxprocs"
31
32
"github.com/carlmjohnson/versioninfo"
33
-
logging "github.com/ipfs/go-log"
34
"github.com/urfave/cli/v2"
35
"go.opentelemetry.io/otel"
36
"go.opentelemetry.io/otel/attribute"
···
42
"gorm.io/plugin/opentelemetry/tracing"
43
)
44
45
-
var log = logging.Logger("bigsky")
46
47
func init() {
48
// control log level using, eg, GOLOG_LOG_LEVEL=debug
···
51
52
func main() {
53
if err := run(os.Args); err != nil {
54
-
log.Fatal(err)
55
}
56
}
57
···
255
// At a minimum, you need to set
256
// OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
257
if ep := cctx.String("otel-exporter-otlp-endpoint"); ep != "" {
258
-
log.Infow("setting up trace exporter", "endpoint", ep)
259
ctx, cancel := context.WithCancel(context.Background())
260
defer cancel()
261
262
exp, err := otlptracehttp.New(ctx)
263
if err != nil {
264
-
log.Fatalw("failed to create trace exporter", "error", err)
265
}
266
defer func() {
267
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
268
defer cancel()
269
if err := exp.Shutdown(ctx); err != nil {
270
-
log.Errorw("failed to shutdown trace exporter", "error", err)
271
}
272
}()
273
···
292
signals := make(chan os.Signal, 1)
293
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
294
295
// start observability/tracing (OTEL and jaeger)
296
if err := setupOTEL(cctx); err != nil {
297
return err
···
304
return err
305
}
306
307
-
log.Infow("setting up main database")
308
dburl := cctx.String("db-url")
309
db, err := cliutil.SetupDatabase(dburl, cctx.Int("max-metadb-connections"))
310
if err != nil {
311
return err
312
}
313
314
-
log.Infow("setting up carstore database")
315
csdburl := cctx.String("carstore-db-url")
316
csdb, err := cliutil.SetupDatabase(csdburl, cctx.Int("max-carstore-connections"))
317
if err != nil {
···
378
var persister events.EventPersistence
379
380
if dpd := cctx.String("disk-persister-dir"); dpd != "" {
381
-
log.Infow("setting up disk persister")
382
383
pOpts := events.DefaultDiskPersistOptions()
384
pOpts.Retention = cctx.Duration("event-playback-ttl")
···
428
429
repoman.SetEventHandler(func(ctx context.Context, evt *repomgr.RepoEvent) {
430
if err := ix.HandleRepoEvent(ctx, evt); err != nil {
431
-
log.Errorw("failed to handle repo event", "err", err)
432
}
433
}, false)
434
···
452
}
453
}
454
455
-
log.Infow("constructing bgs")
456
bgsConfig := libbgs.DefaultBGSConfig()
457
bgsConfig.SSL = !cctx.Bool("crawl-insecure-ws")
458
bgsConfig.CompactInterval = cctx.Duration("compact-interval")
···
469
if err != nil {
470
return fmt.Errorf("failed to parse next-crawler url: %w", err)
471
}
472
-
log.Infow("configuring relay for requestCrawl", "host", nextCrawlerUrls[i])
473
}
474
bgsConfig.NextCrawlers = nextCrawlerUrls
475
}
···
487
// set up metrics endpoint
488
go func() {
489
if err := bgs.StartMetrics(cctx.String("metrics-listen")); err != nil {
490
-
log.Fatalf("failed to start metrics endpoint: %s", err)
491
}
492
}()
493
···
498
bgsErr <- err
499
}()
500
501
-
log.Infow("startup complete")
502
select {
503
case <-signals:
504
log.Info("received shutdown signal")
505
errs := bgs.Shutdown()
506
for err := range errs {
507
-
log.Errorw("error during BGS shutdown", "err", err)
508
}
509
case err := <-bgsErr:
510
if err != nil {
511
-
log.Errorw("error during BGS startup", "err", err)
512
}
513
log.Info("shutting down")
514
errs := bgs.Shutdown()
515
for err := range errs {
516
-
log.Errorw("error during BGS shutdown", "err", err)
517
}
518
}
519
···
3
import (
4
"context"
5
"fmt"
6
+
"log/slog"
7
"net/http"
8
_ "net/http/pprof"
9
"net/url"
···
31
_ "go.uber.org/automaxprocs"
32
33
"github.com/carlmjohnson/versioninfo"
34
"github.com/urfave/cli/v2"
35
"go.opentelemetry.io/otel"
36
"go.opentelemetry.io/otel/attribute"
···
42
"gorm.io/plugin/opentelemetry/tracing"
43
)
44
45
+
var log = slog.Default().With("system", "bigsky")
46
47
func init() {
48
// control log level using, eg, GOLOG_LOG_LEVEL=debug
···
51
52
func main() {
53
if err := run(os.Args); err != nil {
54
+
slog.Error(err.Error())
55
+
os.Exit(1)
56
}
57
}
58
···
256
// At a minimum, you need to set
257
// OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
258
if ep := cctx.String("otel-exporter-otlp-endpoint"); ep != "" {
259
+
slog.Info("setting up trace exporter", "endpoint", ep)
260
ctx, cancel := context.WithCancel(context.Background())
261
defer cancel()
262
263
exp, err := otlptracehttp.New(ctx)
264
if err != nil {
265
+
slog.Error("failed to create trace exporter", "error", err)
266
+
os.Exit(1)
267
}
268
defer func() {
269
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
270
defer cancel()
271
if err := exp.Shutdown(ctx); err != nil {
272
+
slog.Error("failed to shutdown trace exporter", "error", err)
273
}
274
}()
275
···
294
signals := make(chan os.Signal, 1)
295
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
296
297
+
// TODO: set slog default from param/env
298
+
299
// start observability/tracing (OTEL and jaeger)
300
if err := setupOTEL(cctx); err != nil {
301
return err
···
308
return err
309
}
310
311
+
slog.Info("setting up main database")
312
dburl := cctx.String("db-url")
313
db, err := cliutil.SetupDatabase(dburl, cctx.Int("max-metadb-connections"))
314
if err != nil {
315
return err
316
}
317
318
+
slog.Info("setting up carstore database")
319
csdburl := cctx.String("carstore-db-url")
320
csdb, err := cliutil.SetupDatabase(csdburl, cctx.Int("max-carstore-connections"))
321
if err != nil {
···
382
var persister events.EventPersistence
383
384
if dpd := cctx.String("disk-persister-dir"); dpd != "" {
385
+
slog.Info("setting up disk persister")
386
387
pOpts := events.DefaultDiskPersistOptions()
388
pOpts.Retention = cctx.Duration("event-playback-ttl")
···
432
433
repoman.SetEventHandler(func(ctx context.Context, evt *repomgr.RepoEvent) {
434
if err := ix.HandleRepoEvent(ctx, evt); err != nil {
435
+
slog.Error("failed to handle repo event", "err", err)
436
}
437
}, false)
438
···
456
}
457
}
458
459
+
slog.Info("constructing bgs")
460
bgsConfig := libbgs.DefaultBGSConfig()
461
bgsConfig.SSL = !cctx.Bool("crawl-insecure-ws")
462
bgsConfig.CompactInterval = cctx.Duration("compact-interval")
···
473
if err != nil {
474
return fmt.Errorf("failed to parse next-crawler url: %w", err)
475
}
476
+
slog.Info("configuring relay for requestCrawl", "host", nextCrawlerUrls[i])
477
}
478
bgsConfig.NextCrawlers = nextCrawlerUrls
479
}
···
491
// set up metrics endpoint
492
go func() {
493
if err := bgs.StartMetrics(cctx.String("metrics-listen")); err != nil {
494
+
log.Error("failed to start metrics endpoint", "err", err)
495
+
os.Exit(1)
496
}
497
}()
498
···
503
bgsErr <- err
504
}()
505
506
+
slog.Info("startup complete")
507
select {
508
case <-signals:
509
log.Info("received shutdown signal")
510
errs := bgs.Shutdown()
511
for err := range errs {
512
+
slog.Error("error during BGS shutdown", "err", err)
513
}
514
case err := <-bgsErr:
515
if err != nil {
516
+
slog.Error("error during BGS startup", "err", err)
517
}
518
log.Info("shutting down")
519
errs := bgs.Shutdown()
520
for err := range errs {
521
+
slog.Error("error during BGS shutdown", "err", err)
522
}
523
}
524
+1
-1
cmd/goat/firehose.go
+1
-1
cmd/goat/firehose.go
+2
-2
cmd/gosky/car.go
+2
-2
cmd/gosky/car.go
···
64
if topDir == "" {
65
topDir = did.String()
66
}
67
-
log.Infof("writing output to: %s", topDir)
68
69
commitPath := topDir + "/_commit"
70
os.MkdirAll(filepath.Dir(commitPath), os.ModePerm)
···
90
if err != nil {
91
return err
92
}
93
-
log.Debugf("processing record: %s", k)
94
95
// TODO: check if path is safe more carefully
96
recPath := topDir + "/" + k
···
64
if topDir == "" {
65
topDir = did.String()
66
}
67
+
log.Info("writing output", "topDir", topDir)
68
69
commitPath := topDir + "/_commit"
70
os.MkdirAll(filepath.Dir(commitPath), os.ModePerm)
···
90
if err != nil {
91
return err
92
}
93
+
log.Debug("processing record", "rec", k)
94
95
// TODO: check if path is safe more carefully
96
recPath := topDir + "/" + k
+15
-9
cmd/gosky/debug.go
+15
-9
cmd/gosky/debug.go
···
106
}
107
108
seqScheduler := sequential.NewScheduler("debug-inspect-event", rsc.EventHandler)
109
-
err = events.HandleRepoStream(ctx, con, seqScheduler)
110
if err != errFoundIt {
111
return err
112
}
···
284
},
285
}
286
seqScheduler := sequential.NewScheduler("debug-stream", rsc.EventHandler)
287
-
err = events.HandleRepoStream(ctx, con, seqScheduler)
288
if err != nil {
289
return err
290
}
···
390
go func(i int, url string) {
391
con, _, err := d.Dial(url, http.Header{})
392
if err != nil {
393
-
log.Fatalf("Dial failure on url%d: %s", i+1, err)
394
}
395
396
ctx := context.TODO()
···
405
},
406
}
407
seqScheduler := sequential.NewScheduler(fmt.Sprintf("debug-stream-%d", i+1), rsc.EventHandler)
408
-
if err := events.HandleRepoStream(ctx, con, seqScheduler); err != nil {
409
-
log.Fatalf("HandleRepoStream failure on url%d: %s", i+1, err)
410
}
411
}(i, url)
412
}
···
876
logger := log.With("host", cctx.String("host-1"))
877
repo1bytes, err := comatproto.SyncGetRepo(ctx, &xrpc1, did.String(), "")
878
if err != nil {
879
-
logger.Fatalf("getting repo: %s", err)
880
return
881
}
882
883
rep1, err = repo.ReadRepoFromCar(ctx, bytes.NewReader(repo1bytes))
884
if err != nil {
885
-
logger.Fatalf("reading repo: %s", err)
886
return
887
}
888
}()
···
893
logger := log.With("host", cctx.String("host-2"))
894
repo2bytes, err := comatproto.SyncGetRepo(ctx, &xrpc2, did.String(), "")
895
if err != nil {
896
-
logger.Fatalf("getting repo: %s", err)
897
return
898
}
899
900
rep2, err = repo.ReadRepoFromCar(ctx, bytes.NewReader(repo2bytes))
901
if err != nil {
902
-
logger.Fatalf("reading repo: %s", err)
903
return
904
}
905
}()
···
106
}
107
108
seqScheduler := sequential.NewScheduler("debug-inspect-event", rsc.EventHandler)
109
+
err = events.HandleRepoStream(ctx, con, seqScheduler, nil)
110
if err != errFoundIt {
111
return err
112
}
···
284
},
285
}
286
seqScheduler := sequential.NewScheduler("debug-stream", rsc.EventHandler)
287
+
err = events.HandleRepoStream(ctx, con, seqScheduler, nil)
288
if err != nil {
289
return err
290
}
···
390
go func(i int, url string) {
391
con, _, err := d.Dial(url, http.Header{})
392
if err != nil {
393
+
log.Error("Dial failure", "i", i, "url", url, "err", err)
394
+
os.Exit(1)
395
}
396
397
ctx := context.TODO()
···
406
},
407
}
408
seqScheduler := sequential.NewScheduler(fmt.Sprintf("debug-stream-%d", i+1), rsc.EventHandler)
409
+
if err := events.HandleRepoStream(ctx, con, seqScheduler, nil); err != nil {
410
+
log.Error("HandleRepoStream failure", "i", i, "url", url, "err", err)
411
+
os.Exit(1)
412
}
413
}(i, url)
414
}
···
878
logger := log.With("host", cctx.String("host-1"))
879
repo1bytes, err := comatproto.SyncGetRepo(ctx, &xrpc1, did.String(), "")
880
if err != nil {
881
+
logger.Error("getting repo", "err", err)
882
+
os.Exit(1)
883
return
884
}
885
886
rep1, err = repo.ReadRepoFromCar(ctx, bytes.NewReader(repo1bytes))
887
if err != nil {
888
+
logger.Error("reading repo", "err", err)
889
+
os.Exit(1)
890
return
891
}
892
}()
···
897
logger := log.With("host", cctx.String("host-2"))
898
repo2bytes, err := comatproto.SyncGetRepo(ctx, &xrpc2, did.String(), "")
899
if err != nil {
900
+
logger.Error("getting repo", "err", err)
901
+
os.Exit(1)
902
return
903
}
904
905
rep2, err = repo.ReadRepoFromCar(ctx, bytes.NewReader(repo2bytes))
906
if err != nil {
907
+
logger.Error("reading repo", "err", err)
908
+
os.Exit(1)
909
return
910
}
911
}()
+6
-3
cmd/gosky/main.go
+6
-3
cmd/gosky/main.go
···
7
"encoding/json"
8
"fmt"
9
"io"
10
"net/http"
11
"os"
12
"os/signal"
···
39
_ "github.com/joho/godotenv/autoload"
40
41
"github.com/carlmjohnson/versioninfo"
42
-
logging "github.com/ipfs/go-log"
43
"github.com/polydawn/refmt/cbor"
44
rejson "github.com/polydawn/refmt/json"
45
"github.com/polydawn/refmt/shared"
46
cli "github.com/urfave/cli/v2"
47
)
48
49
-
var log = logging.Logger("gosky")
50
51
func main() {
52
run(os.Args)
···
80
EnvVars: []string{"ATP_PLC_HOST"},
81
},
82
}
83
app.Commands = []*cli.Command{
84
accountCmd,
85
adminCmd,
···
339
},
340
}
341
seqScheduler := sequential.NewScheduler(con.RemoteAddr().String(), rsc.EventHandler)
342
-
return events.HandleRepoStream(ctx, con, seqScheduler)
343
},
344
}
345
···
7
"encoding/json"
8
"fmt"
9
"io"
10
+
"log/slog"
11
"net/http"
12
"os"
13
"os/signal"
···
40
_ "github.com/joho/godotenv/autoload"
41
42
"github.com/carlmjohnson/versioninfo"
43
"github.com/polydawn/refmt/cbor"
44
rejson "github.com/polydawn/refmt/json"
45
"github.com/polydawn/refmt/shared"
46
cli "github.com/urfave/cli/v2"
47
)
48
49
+
var log = slog.Default().With("system", "gosky")
50
51
func main() {
52
run(os.Args)
···
80
EnvVars: []string{"ATP_PLC_HOST"},
81
},
82
}
83
+
84
+
// TODO: slog.SetDefault from param/env
85
+
86
app.Commands = []*cli.Command{
87
accountCmd,
88
adminCmd,
···
342
},
343
}
344
seqScheduler := sequential.NewScheduler(con.RemoteAddr().String(), rsc.EventHandler)
345
+
return events.HandleRepoStream(ctx, con, seqScheduler, log)
346
},
347
}
348
+4
-4
cmd/gosky/streamdiff.go
+4
-4
cmd/gosky/streamdiff.go
···
58
},
59
}
60
seqScheduler := sequential.NewScheduler("streamA", rsc.EventHandler)
61
-
err = events.HandleRepoStream(ctx, cona, seqScheduler)
62
if err != nil {
63
-
log.Errorf("stream A failed: %s", err)
64
}
65
}()
66
···
82
}
83
84
seqScheduler := sequential.NewScheduler("streamB", rsc.EventHandler)
85
-
err = events.HandleRepoStream(ctx, conb, seqScheduler)
86
if err != nil {
87
-
log.Errorf("stream B failed: %s", err)
88
}
89
}()
90
···
58
},
59
}
60
seqScheduler := sequential.NewScheduler("streamA", rsc.EventHandler)
61
+
err = events.HandleRepoStream(ctx, cona, seqScheduler, log)
62
if err != nil {
63
+
log.Error("stream A failed", "err", err)
64
}
65
}()
66
···
82
}
83
84
seqScheduler := sequential.NewScheduler("streamB", rsc.EventHandler)
85
+
err = events.HandleRepoStream(ctx, conb, seqScheduler, log)
86
if err != nil {
87
+
log.Error("stream B failed", "err", err)
88
}
89
}()
90
+1
-1
cmd/gosky/sync.go
+1
-1
cmd/gosky/sync.go
-3
cmd/laputa/main.go
-3
cmd/laputa/main.go
···
14
_ "go.uber.org/automaxprocs"
15
16
"github.com/carlmjohnson/versioninfo"
17
-
logging "github.com/ipfs/go-log"
18
"github.com/urfave/cli/v2"
19
"go.opentelemetry.io/otel"
20
"go.opentelemetry.io/otel/attribute"
···
24
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
25
"gorm.io/plugin/opentelemetry/tracing"
26
)
27
-
28
-
var log = logging.Logger("laputa")
29
30
func main() {
31
run(os.Args)
···
14
_ "go.uber.org/automaxprocs"
15
16
"github.com/carlmjohnson/versioninfo"
17
"github.com/urfave/cli/v2"
18
"go.opentelemetry.io/otel"
19
"go.opentelemetry.io/otel/attribute"
···
23
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
24
"gorm.io/plugin/opentelemetry/tracing"
25
)
26
27
func main() {
28
run(os.Args)
+20
-14
cmd/rainbow/main.go
+20
-14
cmd/rainbow/main.go
···
3
import (
4
"context"
5
"github.com/bluesky-social/indigo/events"
6
_ "net/http/pprof"
7
"os"
8
"os/signal"
···
15
_ "go.uber.org/automaxprocs"
16
17
"github.com/carlmjohnson/versioninfo"
18
-
logging "github.com/ipfs/go-log"
19
"github.com/urfave/cli/v2"
20
"go.opentelemetry.io/otel"
21
"go.opentelemetry.io/otel/attribute"
···
25
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
26
)
27
28
-
var log = logging.Logger("rainbow")
29
30
func init() {
31
// control log level using, eg, GOLOG_LOG_LEVEL=debug
32
-
logging.SetAllLoggers(logging.LevelDebug)
33
}
34
35
func main() {
···
90
},
91
}
92
93
app.Action = Splitter
94
err := app.Run(os.Args)
95
if err != nil {
96
-
log.Fatal(err)
97
}
98
}
99
···
108
// At a minimum, you need to set
109
// OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
110
if ep := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"); ep != "" {
111
-
log.Infow("setting up trace exporter", "endpoint", ep)
112
ctx, cancel := context.WithCancel(context.Background())
113
defer cancel()
114
115
exp, err := otlptracehttp.New(ctx)
116
if err != nil {
117
-
log.Fatalw("failed to create trace exporter", "error", err)
118
}
119
defer func() {
120
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
121
defer cancel()
122
if err := exp.Shutdown(ctx); err != nil {
123
-
log.Errorw("failed to shutdown trace exporter", "error", err)
124
}
125
}()
126
···
142
var spl *splitter.Splitter
143
var err error
144
if persistPath != "" {
145
-
log.Infof("building splitter with storage at: %s", persistPath)
146
ppopts := events.PebblePersistOptions{
147
DbPath: persistPath,
148
PersistDuration: time.Duration(float64(time.Hour) * cctx.Float64("persist-hours")),
···
164
spl, err = splitter.NewSplitter(conf)
165
}
166
if err != nil {
167
-
log.Fatalw("failed to create splitter", "path", persistPath, "error", err)
168
return err
169
}
170
171
// set up metrics endpoint
172
go func() {
173
if err := spl.StartMetrics(cctx.String("metrics-listen")); err != nil {
174
-
log.Fatalf("failed to start metrics endpoint: %s", err)
175
}
176
}()
177
···
182
runErr <- err
183
}()
184
185
-
log.Infow("startup complete")
186
select {
187
case <-signals:
188
log.Info("received shutdown signal")
189
if err := spl.Shutdown(); err != nil {
190
-
log.Errorw("error during Splitter shutdown", "err", err)
191
}
192
case err := <-runErr:
193
if err != nil {
194
-
log.Errorw("error during Splitter startup", "err", err)
195
}
196
log.Info("shutting down")
197
if err := spl.Shutdown(); err != nil {
198
-
log.Errorw("error during Splitter shutdown", "err", err)
199
}
200
}
201
···
3
import (
4
"context"
5
"github.com/bluesky-social/indigo/events"
6
+
"log/slog"
7
_ "net/http/pprof"
8
"os"
9
"os/signal"
···
16
_ "go.uber.org/automaxprocs"
17
18
"github.com/carlmjohnson/versioninfo"
19
"github.com/urfave/cli/v2"
20
"go.opentelemetry.io/otel"
21
"go.opentelemetry.io/otel/attribute"
···
25
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
26
)
27
28
+
var log = slog.Default().With("system", "rainbow")
29
30
func init() {
31
// control log level using, eg, GOLOG_LOG_LEVEL=debug
32
+
//logging.SetAllLoggers(logging.LevelDebug)
33
}
34
35
func main() {
···
90
},
91
}
92
93
+
// TODO: slog.SetDefault and set module `var log *slog.Logger` based on flags and env
94
+
95
app.Action = Splitter
96
err := app.Run(os.Args)
97
if err != nil {
98
+
log.Error(err.Error())
99
+
os.Exit(1)
100
}
101
}
102
···
111
// At a minimum, you need to set
112
// OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
113
if ep := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"); ep != "" {
114
+
log.Info("setting up trace exporter", "endpoint", ep)
115
ctx, cancel := context.WithCancel(context.Background())
116
defer cancel()
117
118
exp, err := otlptracehttp.New(ctx)
119
if err != nil {
120
+
log.Error("failed to create trace exporter", "error", err)
121
+
os.Exit(1)
122
}
123
defer func() {
124
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
125
defer cancel()
126
if err := exp.Shutdown(ctx); err != nil {
127
+
log.Error("failed to shutdown trace exporter", "error", err)
128
}
129
}()
130
···
146
var spl *splitter.Splitter
147
var err error
148
if persistPath != "" {
149
+
log.Info("building splitter with storage at", "path", persistPath)
150
ppopts := events.PebblePersistOptions{
151
DbPath: persistPath,
152
PersistDuration: time.Duration(float64(time.Hour) * cctx.Float64("persist-hours")),
···
168
spl, err = splitter.NewSplitter(conf)
169
}
170
if err != nil {
171
+
log.Error("failed to create splitter", "path", persistPath, "error", err)
172
+
os.Exit(1)
173
return err
174
}
175
176
// set up metrics endpoint
177
go func() {
178
if err := spl.StartMetrics(cctx.String("metrics-listen")); err != nil {
179
+
log.Error("failed to start metrics endpoint", "err", err)
180
+
os.Exit(1)
181
}
182
}()
183
···
188
runErr <- err
189
}()
190
191
+
log.Info("startup complete")
192
select {
193
case <-signals:
194
log.Info("received shutdown signal")
195
if err := spl.Shutdown(); err != nil {
196
+
log.Error("error during Splitter shutdown", "err", err)
197
}
198
case err := <-runErr:
199
if err != nil {
200
+
log.Error("error during Splitter startup", "err", err)
201
}
202
log.Info("shutting down")
203
if err := spl.Shutdown(); err != nil {
204
+
log.Error("error during Splitter shutdown", "err", err)
205
}
206
}
207
+1
-1
cmd/sonar/main.go
+1
-1
cmd/sonar/main.go
-3
cmd/stress/main.go
-3
cmd/stress/main.go
+18
-10
events/consumer.go
+18
-10
events/consumer.go
···
4
"context"
5
"fmt"
6
"io"
7
"net"
8
"time"
9
···
108
return n, err
109
}
110
111
-
func HandleRepoStream(ctx context.Context, con *websocket.Conn, sched Scheduler) error {
112
ctx, cancel := context.WithCancel(ctx)
113
defer cancel()
114
defer sched.Shutdown()
···
124
select {
125
case <-t.C:
126
if err := con.WriteControl(websocket.PingMessage, []byte{}, time.Now().Add(time.Second*10)); err != nil {
127
-
log.Warnf("failed to ping: %s", err)
128
}
129
case <-ctx.Done():
130
con.Close()
···
145
146
con.SetPongHandler(func(_ string) error {
147
if err := con.SetReadDeadline(time.Now().Add(time.Minute)); err != nil {
148
-
log.Errorf("failed to set read deadline: %s", err)
149
}
150
151
return nil
···
194
}
195
196
if evt.Seq < lastSeq {
197
-
log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq)
198
}
199
200
lastSeq = evt.Seq
···
211
}
212
213
if evt.Seq < lastSeq {
214
-
log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq)
215
}
216
lastSeq = evt.Seq
217
···
227
}
228
229
if evt.Seq < lastSeq {
230
-
log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq)
231
}
232
lastSeq = evt.Seq
233
···
243
}
244
245
if evt.Seq < lastSeq {
246
-
log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq)
247
}
248
lastSeq = evt.Seq
249
···
271
}
272
273
if evt.Seq < lastSeq {
274
-
log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq)
275
}
276
lastSeq = evt.Seq
277
···
287
}
288
289
if evt.Seq < lastSeq {
290
-
log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq)
291
}
292
lastSeq = evt.Seq
293
···
303
}
304
305
if evt.Seq < lastSeq {
306
-
log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq)
307
}
308
309
lastSeq = evt.Seq
···
4
"context"
5
"fmt"
6
"io"
7
+
"log/slog"
8
"net"
9
"time"
10
···
109
return n, err
110
}
111
112
+
// HandleRepoStream
113
+
// con is source of events
114
+
// sched gets AddWork for each event
115
+
// log may be nil for default logger
116
+
func HandleRepoStream(ctx context.Context, con *websocket.Conn, sched Scheduler, log *slog.Logger) error {
117
+
if log == nil {
118
+
log = slog.Default().With("system", "events")
119
+
}
120
ctx, cancel := context.WithCancel(ctx)
121
defer cancel()
122
defer sched.Shutdown()
···
132
select {
133
case <-t.C:
134
if err := con.WriteControl(websocket.PingMessage, []byte{}, time.Now().Add(time.Second*10)); err != nil {
135
+
log.Warn("failed to ping", "err", err)
136
}
137
case <-ctx.Done():
138
con.Close()
···
153
154
con.SetPongHandler(func(_ string) error {
155
if err := con.SetReadDeadline(time.Now().Add(time.Minute)); err != nil {
156
+
log.Error("failed to set read deadline", "err", err)
157
}
158
159
return nil
···
202
}
203
204
if evt.Seq < lastSeq {
205
+
log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq)
206
}
207
208
lastSeq = evt.Seq
···
219
}
220
221
if evt.Seq < lastSeq {
222
+
log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq)
223
}
224
lastSeq = evt.Seq
225
···
235
}
236
237
if evt.Seq < lastSeq {
238
+
log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq)
239
}
240
lastSeq = evt.Seq
241
···
251
}
252
253
if evt.Seq < lastSeq {
254
+
log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq)
255
}
256
lastSeq = evt.Seq
257
···
279
}
280
281
if evt.Seq < lastSeq {
282
+
log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq)
283
}
284
lastSeq = evt.Seq
285
···
295
}
296
297
if evt.Seq < lastSeq {
298
+
log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq)
299
}
300
lastSeq = evt.Seq
301
···
311
}
312
313
if evt.Seq < lastSeq {
314
+
log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq)
315
}
316
317
lastSeq = evt.Seq
+2
-2
events/dbpersist.go
+2
-2
events/dbpersist.go
···
131
132
if needsFlush {
133
if err := p.Flush(context.Background()); err != nil {
134
-
log.Errorf("failed to flush batch: %s", err)
135
}
136
}
137
}
···
323
func (p *DbPersistence) RecordFromRepoCommit(ctx context.Context, evt *comatproto.SyncSubscribeRepos_Commit) (*RepoEventRecord, error) {
324
// TODO: hack hack hack
325
if len(evt.Ops) > 8192 {
326
-
log.Errorf("(VERY BAD) truncating ops field in outgoing event (len = %d)", len(evt.Ops))
327
evt.Ops = evt.Ops[:8192]
328
}
329
···
131
132
if needsFlush {
133
if err := p.Flush(context.Background()); err != nil {
134
+
log.Error("failed to flush batch", "err", err)
135
}
136
}
137
}
···
323
func (p *DbPersistence) RecordFromRepoCommit(ctx context.Context, evt *comatproto.SyncSubscribeRepos_Commit) (*RepoEventRecord, error) {
324
// TODO: hack hack hack
325
if len(evt.Ops) > 8192 {
326
+
log.Error("(VERY BAD) truncating ops field in outgoing event", "len", len(evt.Ops))
327
evt.Ops = evt.Ops[:8192]
328
}
329
-5
events/dbpersist_test.go
-5
events/dbpersist_test.go
···
16
pds "github.com/bluesky-social/indigo/pds/data"
17
"github.com/bluesky-social/indigo/repomgr"
18
"github.com/bluesky-social/indigo/util"
19
-
logging "github.com/ipfs/go-log/v2"
20
"gorm.io/driver/sqlite"
21
"gorm.io/gorm"
22
)
23
-
24
-
func init() {
25
-
logging.SetAllLoggers(logging.LevelDebug)
26
-
}
27
28
func BenchmarkDBPersist(b *testing.B) {
29
ctx := context.Background()
+5
-5
events/diskpersist.go
+5
-5
events/diskpersist.go
···
312
dp.lk.Lock()
313
if err := dp.flushLog(ctx); err != nil {
314
// TODO: this happening is quite bad. Need a recovery strategy
315
-
log.Errorf("failed to flush disk log: %s", err)
316
}
317
dp.lk.Unlock()
318
}
···
354
case <-t.C:
355
if errs := dp.garbageCollect(ctx); len(errs) > 0 {
356
for _, err := range errs {
357
-
log.Errorf("garbage collection error: %s", err)
358
}
359
}
360
}
···
430
refsGarbageCollected.WithLabelValues().Add(float64(refsDeleted))
431
filesGarbageCollected.WithLabelValues().Add(float64(filesDeleted))
432
433
-
log.Infow("garbage collection complete",
434
"filesDeleted", filesDeleted,
435
"refsDeleted", refsDeleted,
436
"oldRefsFound", oldRefsFound,
···
696
return nil, err
697
}
698
if since > lastSeq {
699
-
log.Errorw("playback cursor is greater than last seq of file checked",
700
"since", since,
701
"lastSeq", lastSeq,
702
"filename", fn,
···
778
return nil, err
779
}
780
default:
781
-
log.Warnw("unrecognized event kind coming from log file", "seq", h.Seq, "kind", h.Kind)
782
return nil, fmt.Errorf("halting on unrecognized event kind")
783
}
784
}
···
312
dp.lk.Lock()
313
if err := dp.flushLog(ctx); err != nil {
314
// TODO: this happening is quite bad. Need a recovery strategy
315
+
log.Error("failed to flush disk log", "err", err)
316
}
317
dp.lk.Unlock()
318
}
···
354
case <-t.C:
355
if errs := dp.garbageCollect(ctx); len(errs) > 0 {
356
for _, err := range errs {
357
+
log.Error("garbage collection error", "err", err)
358
}
359
}
360
}
···
430
refsGarbageCollected.WithLabelValues().Add(float64(refsDeleted))
431
filesGarbageCollected.WithLabelValues().Add(float64(filesDeleted))
432
433
+
log.Info("garbage collection complete",
434
"filesDeleted", filesDeleted,
435
"refsDeleted", refsDeleted,
436
"oldRefsFound", oldRefsFound,
···
696
return nil, err
697
}
698
if since > lastSeq {
699
+
log.Error("playback cursor is greater than last seq of file checked",
700
"since", since,
701
"lastSeq", lastSeq,
702
"filename", fn,
···
778
return nil, err
779
}
780
default:
781
+
log.Warn("unrecognized event kind coming from log file", "seq", h.Seq, "kind", h.Kind)
782
return nil, fmt.Errorf("halting on unrecognized event kind")
783
}
784
}
+12
-9
events/events.go
+12
-9
events/events.go
···
6
"errors"
7
"fmt"
8
"io"
9
"sync"
10
"time"
11
···
14
"github.com/bluesky-social/indigo/models"
15
"github.com/prometheus/client_golang/prometheus"
16
17
-
logging "github.com/ipfs/go-log"
18
cbg "github.com/whyrusleeping/cbor-gen"
19
"go.opentelemetry.io/otel"
20
)
21
22
-
var log = logging.Logger("events")
23
24
type Scheduler interface {
25
AddWork(ctx context.Context, repo string, val *XRPCStreamEvent) error
···
34
crossoverBufferSize int
35
36
persister EventPersistence
37
}
38
39
func NewEventManager(persister EventPersistence) *EventManager {
···
41
bufferSize: 16 << 10,
42
crossoverBufferSize: 512,
43
persister: persister,
44
}
45
46
persister.SetEventBroadcaster(em.broadcastEvent)
···
67
func (em *EventManager) broadcastEvent(evt *XRPCStreamEvent) {
68
// the main thing we do is send it out, so MarshalCBOR once
69
if err := evt.Preserialize(); err != nil {
70
-
log.Errorf("broadcast serialize failed, %s", err)
71
// serialize isn't going to go better later, this event is cursed
72
return
73
}
···
93
// code
94
s.filter = func(*XRPCStreamEvent) bool { return false }
95
96
-
log.Warnw("dropping slow consumer due to event overflow", "bufferSize", len(s.outgoing), "ident", s.ident)
97
go func(torem *Subscriber) {
98
torem.lk.Lock()
99
if !torem.cleanedUp {
···
104
},
105
}:
106
case <-time.After(time.Second * 5):
107
-
log.Warnw("failed to send error frame to backed up consumer", "ident", torem.ident)
108
}
109
}
110
torem.lk.Unlock()
···
121
// accept a uid. The lookup inside the persister is notably expensive (despite
122
// being an lru cache?)
123
if err := em.persister.Persist(ctx, evt); err != nil {
124
-
log.Errorf("failed to persist outbound event: %s", err)
125
}
126
}
127
···
370
}
371
}); err != nil {
372
if errors.Is(err, ErrPlaybackShutdown) {
373
-
log.Warnf("events playback: %s", err)
374
} else {
375
-
log.Errorf("events playback: %s", err)
376
}
377
378
// TODO: send an error frame or something?
···
400
}
401
}); err != nil {
402
if !errors.Is(err, ErrCaughtUp) {
403
-
log.Errorf("events playback: %s", err)
404
405
// TODO: send an error frame or something?
406
close(out)
···
6
"errors"
7
"fmt"
8
"io"
9
+
"log/slog"
10
"sync"
11
"time"
12
···
15
"github.com/bluesky-social/indigo/models"
16
"github.com/prometheus/client_golang/prometheus"
17
18
cbg "github.com/whyrusleeping/cbor-gen"
19
"go.opentelemetry.io/otel"
20
)
21
22
+
var log = slog.Default().With("system", "events")
23
24
type Scheduler interface {
25
AddWork(ctx context.Context, repo string, val *XRPCStreamEvent) error
···
34
crossoverBufferSize int
35
36
persister EventPersistence
37
+
38
+
log *slog.Logger
39
}
40
41
func NewEventManager(persister EventPersistence) *EventManager {
···
43
bufferSize: 16 << 10,
44
crossoverBufferSize: 512,
45
persister: persister,
46
+
log: slog.Default().With("system", "events"),
47
}
48
49
persister.SetEventBroadcaster(em.broadcastEvent)
···
70
func (em *EventManager) broadcastEvent(evt *XRPCStreamEvent) {
71
// the main thing we do is send it out, so MarshalCBOR once
72
if err := evt.Preserialize(); err != nil {
73
+
em.log.Error("broadcast serialize failed", "err", err)
74
// serialize isn't going to go better later, this event is cursed
75
return
76
}
···
96
// code
97
s.filter = func(*XRPCStreamEvent) bool { return false }
98
99
+
em.log.Warn("dropping slow consumer due to event overflow", "bufferSize", len(s.outgoing), "ident", s.ident)
100
go func(torem *Subscriber) {
101
torem.lk.Lock()
102
if !torem.cleanedUp {
···
107
},
108
}:
109
case <-time.After(time.Second * 5):
110
+
em.log.Warn("failed to send error frame to backed up consumer", "ident", torem.ident)
111
}
112
}
113
torem.lk.Unlock()
···
124
// accept a uid. The lookup inside the persister is notably expensive (despite
125
// being an lru cache?)
126
if err := em.persister.Persist(ctx, evt); err != nil {
127
+
em.log.Error("failed to persist outbound event", "err", err)
128
}
129
}
130
···
373
}
374
}); err != nil {
375
if errors.Is(err, ErrPlaybackShutdown) {
376
+
em.log.Warn("events playback", "err", err)
377
} else {
378
+
em.log.Error("events playback", "err", err)
379
}
380
381
// TODO: send an error frame or something?
···
403
}
404
}); err != nil {
405
if !errors.Is(err, ErrCaughtUp) {
406
+
em.log.Error("events playback", "err", err)
407
408
// TODO: send an error frame or something?
409
close(out)
+6
-6
events/pebblepersist.go
+6
-6
events/pebblepersist.go
···
193
case <-ticker.C:
194
err := pp.GarbageCollect(ctx)
195
if err != nil {
196
-
log.Errorw("GC err", "err", err)
197
}
198
case <-ctx.Done():
199
return
···
239
sizeBefore, _ := pp.db.EstimateDiskUsage(zeroKey[:], ffffKey[:])
240
if seq == -1 {
241
// nothing to delete
242
-
log.Infow("pebble gc nop", "size", sizeBefore)
243
return nil
244
}
245
var key [16]byte
246
setKeySeqMillis(key[:], seq, lastKeyTime)
247
-
log.Infow("pebble gc start", "to", hex.EncodeToString(key[:]))
248
err = pp.db.DeleteRange(zeroKey[:], key[:], pebble.Sync)
249
if err != nil {
250
return err
251
}
252
sizeAfter, _ := pp.db.EstimateDiskUsage(zeroKey[:], ffffKey[:])
253
-
log.Infow("pebble gc", "before", sizeBefore, "after", sizeAfter)
254
start := time.Now()
255
err = pp.db.Compact(zeroKey[:], key[:], true)
256
if err != nil {
257
-
log.Warnw("pebble gc compact", "err", err)
258
}
259
dt := time.Since(start)
260
-
log.Infow("pebble gc compact ok", "dt", dt)
261
return nil
262
}
···
193
case <-ticker.C:
194
err := pp.GarbageCollect(ctx)
195
if err != nil {
196
+
log.Error("GC err", "err", err)
197
}
198
case <-ctx.Done():
199
return
···
239
sizeBefore, _ := pp.db.EstimateDiskUsage(zeroKey[:], ffffKey[:])
240
if seq == -1 {
241
// nothing to delete
242
+
log.Info("pebble gc nop", "size", sizeBefore)
243
return nil
244
}
245
var key [16]byte
246
setKeySeqMillis(key[:], seq, lastKeyTime)
247
+
log.Info("pebble gc start", "to", hex.EncodeToString(key[:]))
248
err = pp.db.DeleteRange(zeroKey[:], key[:], pebble.Sync)
249
if err != nil {
250
return err
251
}
252
sizeAfter, _ := pp.db.EstimateDiskUsage(zeroKey[:], ffffKey[:])
253
+
log.Info("pebble gc", "before", sizeBefore, "after", sizeAfter)
254
start := time.Now()
255
err = pp.db.Compact(zeroKey[:], key[:], true)
256
if err != nil {
257
+
log.Warn("pebble gc compact", "err", err)
258
}
259
dt := time.Since(start)
260
+
log.Info("pebble gc compact ok", "dt", dt)
261
return nil
262
}
+14
-12
events/schedulers/autoscaling/autoscaling.go
+14
-12
events/schedulers/autoscaling/autoscaling.go
···
2
3
import (
4
"context"
5
"sync"
6
"time"
7
8
"github.com/bluesky-social/indigo/events"
9
"github.com/bluesky-social/indigo/events/schedulers"
10
-
logging "github.com/ipfs/go-log"
11
"github.com/prometheus/client_golang/prometheus"
12
)
13
-
14
-
var log = logging.Logger("autoscaling-scheduler")
15
16
// Scheduler is a scheduler that will scale up and down the number of workers based on the throughput of the workers.
17
type Scheduler struct {
···
40
autoscaleFrequency time.Duration
41
autoscalerIn chan struct{}
42
autoscalerOut chan struct{}
43
}
44
45
type AutoscaleSettings struct {
···
99
autoscaleFrequency: autoscaleSettings.AutoscaleFrequency,
100
autoscalerIn: make(chan struct{}),
101
autoscalerOut: make(chan struct{}),
102
}
103
104
for i := 0; i < p.concurrency; i++ {
···
111
}
112
113
func (p *Scheduler) Shutdown() {
114
-
log.Debugf("shutting down autoscaling scheduler for %s", p.ident)
115
116
// stop autoscaling
117
p.autoscalerIn <- struct{}{}
118
close(p.autoscalerIn)
119
<-p.autoscalerOut
120
121
-
log.Debug("stopping autoscaling scheduler workers")
122
// stop workers
123
for i := 0; i < p.concurrency; i++ {
124
p.feeder <- &consumerTask{signal: "stop"}
125
}
126
close(p.feeder)
127
128
-
log.Debug("waiting for autoscaling scheduler workers to stop")
129
130
p.workerGroup.Wait()
131
132
-
log.Debug("stopping autoscaling scheduler throughput manager")
133
p.throughputManager.Stop()
134
135
-
log.Debug("autoscaling scheduler shutdown complete")
136
}
137
138
// Add autoscaling function
···
197
}
198
199
func (p *Scheduler) worker() {
200
-
log.Debugf("starting autoscaling worker for %s", p.ident)
201
p.workersActive.Inc()
202
p.workerGroup.Add(1)
203
defer p.workerGroup.Done()
···
205
for work != nil {
206
// Check if the work item contains a signal to stop the worker.
207
if work.signal == "stop" {
208
-
log.Debugf("stopping autoscaling worker for %s", p.ident)
209
p.workersActive.Dec()
210
return
211
}
212
213
p.itemsActive.Inc()
214
if err := p.do(context.TODO(), work.val); err != nil {
215
-
log.Errorf("event handler failed: %s", err)
216
}
217
p.itemsProcessed.Inc()
218
219
p.lk.Lock()
220
rem, ok := p.active[work.repo]
221
if !ok {
222
-
log.Errorf("should always have an 'active' entry if a worker is processing a job")
223
}
224
225
if len(rem) == 0 {
···
2
3
import (
4
"context"
5
+
"log/slog"
6
"sync"
7
"time"
8
9
"github.com/bluesky-social/indigo/events"
10
"github.com/bluesky-social/indigo/events/schedulers"
11
"github.com/prometheus/client_golang/prometheus"
12
)
13
14
// Scheduler is a scheduler that will scale up and down the number of workers based on the throughput of the workers.
15
type Scheduler struct {
···
38
autoscaleFrequency time.Duration
39
autoscalerIn chan struct{}
40
autoscalerOut chan struct{}
41
+
42
+
log *slog.Logger
43
}
44
45
type AutoscaleSettings struct {
···
99
autoscaleFrequency: autoscaleSettings.AutoscaleFrequency,
100
autoscalerIn: make(chan struct{}),
101
autoscalerOut: make(chan struct{}),
102
+
103
+
log: slog.Default().With("system", "autoscaling-scheduler"),
104
}
105
106
for i := 0; i < p.concurrency; i++ {
···
113
}
114
115
func (p *Scheduler) Shutdown() {
116
+
p.log.Debug("shutting down autoscaling scheduler", "ident", p.ident)
117
118
// stop autoscaling
119
p.autoscalerIn <- struct{}{}
120
close(p.autoscalerIn)
121
<-p.autoscalerOut
122
123
+
p.log.Debug("stopping autoscaling scheduler workers")
124
// stop workers
125
for i := 0; i < p.concurrency; i++ {
126
p.feeder <- &consumerTask{signal: "stop"}
127
}
128
close(p.feeder)
129
130
+
p.log.Debug("waiting for autoscaling scheduler workers to stop")
131
132
p.workerGroup.Wait()
133
134
+
p.log.Debug("stopping autoscaling scheduler throughput manager")
135
p.throughputManager.Stop()
136
137
+
p.log.Debug("autoscaling scheduler shutdown complete")
138
}
139
140
// Add autoscaling function
···
199
}
200
201
func (p *Scheduler) worker() {
202
+
p.log.Debug("starting autoscaling worker", "ident", p.ident)
203
p.workersActive.Inc()
204
p.workerGroup.Add(1)
205
defer p.workerGroup.Done()
···
207
for work != nil {
208
// Check if the work item contains a signal to stop the worker.
209
if work.signal == "stop" {
210
+
p.log.Debug("stopping autoscaling worker", "ident", p.ident)
211
p.workersActive.Dec()
212
return
213
}
214
215
p.itemsActive.Inc()
216
if err := p.do(context.TODO(), work.val); err != nil {
217
+
p.log.Error("event handler failed", "err", err)
218
}
219
p.itemsProcessed.Inc()
220
221
p.lk.Lock()
222
rem, ok := p.active[work.repo]
223
if !ok {
224
+
p.log.Error("should always have an 'active' entry if a worker is processing a job")
225
}
226
227
if len(rem) == 0 {
+9
-7
events/schedulers/parallel/parallel.go
+9
-7
events/schedulers/parallel/parallel.go
···
2
3
import (
4
"context"
5
"sync"
6
7
"github.com/bluesky-social/indigo/events"
8
"github.com/bluesky-social/indigo/events/schedulers"
9
-
logging "github.com/ipfs/go-log"
10
11
"github.com/prometheus/client_golang/prometheus"
12
)
13
-
14
-
var log = logging.Logger("parallel-scheduler")
15
16
// Scheduler is a parallel scheduler that will run work on a fixed number of workers
17
type Scheduler struct {
···
33
itemsProcessed prometheus.Counter
34
itemsActive prometheus.Counter
35
workesActive prometheus.Gauge
36
}
37
38
func NewScheduler(maxC, maxQ int, ident string, do func(context.Context, *events.XRPCStreamEvent) error) *Scheduler {
···
52
itemsProcessed: schedulers.WorkItemsProcessed.WithLabelValues(ident, "parallel"),
53
itemsActive: schedulers.WorkItemsActive.WithLabelValues(ident, "parallel"),
54
workesActive: schedulers.WorkersActive.WithLabelValues(ident, "parallel"),
55
}
56
57
for i := 0; i < maxC; i++ {
···
64
}
65
66
func (p *Scheduler) Shutdown() {
67
-
log.Infof("shutting down parallel scheduler for %s", p.ident)
68
69
for i := 0; i < p.maxConcurrency; i++ {
70
p.feeder <- &consumerTask{
···
78
<-p.out
79
}
80
81
-
log.Info("parallel scheduler shutdown complete")
82
}
83
84
type consumerTask struct {
···
123
124
p.itemsActive.Inc()
125
if err := p.do(context.TODO(), work.val); err != nil {
126
-
log.Errorf("event handler failed: %s", err)
127
}
128
p.itemsProcessed.Inc()
129
130
p.lk.Lock()
131
rem, ok := p.active[work.repo]
132
if !ok {
133
-
log.Errorf("should always have an 'active' entry if a worker is processing a job")
134
}
135
136
if len(rem) == 0 {
···
2
3
import (
4
"context"
5
+
"log/slog"
6
"sync"
7
8
"github.com/bluesky-social/indigo/events"
9
"github.com/bluesky-social/indigo/events/schedulers"
10
11
"github.com/prometheus/client_golang/prometheus"
12
)
13
14
// Scheduler is a parallel scheduler that will run work on a fixed number of workers
15
type Scheduler struct {
···
31
itemsProcessed prometheus.Counter
32
itemsActive prometheus.Counter
33
workesActive prometheus.Gauge
34
+
35
+
log *slog.Logger
36
}
37
38
func NewScheduler(maxC, maxQ int, ident string, do func(context.Context, *events.XRPCStreamEvent) error) *Scheduler {
···
52
itemsProcessed: schedulers.WorkItemsProcessed.WithLabelValues(ident, "parallel"),
53
itemsActive: schedulers.WorkItemsActive.WithLabelValues(ident, "parallel"),
54
workesActive: schedulers.WorkersActive.WithLabelValues(ident, "parallel"),
55
+
56
+
log: slog.Default().With("system", "parallel-scheduler"),
57
}
58
59
for i := 0; i < maxC; i++ {
···
66
}
67
68
func (p *Scheduler) Shutdown() {
69
+
p.log.Info("shutting down parallel scheduler", "ident", p.ident)
70
71
for i := 0; i < p.maxConcurrency; i++ {
72
p.feeder <- &consumerTask{
···
80
<-p.out
81
}
82
83
+
p.log.Info("parallel scheduler shutdown complete")
84
}
85
86
type consumerTask struct {
···
125
126
p.itemsActive.Inc()
127
if err := p.do(context.TODO(), work.val); err != nil {
128
+
p.log.Error("event handler failed", "err", err)
129
}
130
p.itemsProcessed.Inc()
131
132
p.lk.Lock()
133
rem, ok := p.active[work.repo]
134
if !ok {
135
+
p.log.Error("should always have an 'active' entry if a worker is processing a job")
136
}
137
138
if len(rem) == 0 {
+1
-3
events/schedulers/sequential/sequential.go
+1
-3
events/schedulers/sequential/sequential.go
···
2
3
import (
4
"context"
5
-
6
"github.com/bluesky-social/indigo/events"
7
"github.com/bluesky-social/indigo/events/schedulers"
8
-
logging "github.com/ipfs/go-log"
9
"github.com/prometheus/client_golang/prometheus"
10
)
11
12
-
var log = logging.Logger("sequential-scheduler")
13
14
// Scheduler is a sequential scheduler that will run work on a single worker
15
type Scheduler struct {
···
2
3
import (
4
"context"
5
"github.com/bluesky-social/indigo/events"
6
"github.com/bluesky-social/indigo/events/schedulers"
7
"github.com/prometheus/client_golang/prometheus"
8
)
9
10
+
// var log = slog.Default().With("system", "sequential-scheduler")
11
12
// Scheduler is a sequential scheduler that will run work on a single worker
13
type Scheduler struct {
+1
-1
fakedata/accounts.go
+1
-1
fakedata/accounts.go
+7
-3
fakedata/generators.go
+7
-3
fakedata/generators.go
···
7
"bytes"
8
"context"
9
"fmt"
10
"math/rand"
11
"time"
12
···
16
"github.com/bluesky-social/indigo/xrpc"
17
18
"github.com/brianvoe/gofakeit/v6"
19
-
logging "github.com/ipfs/go-log"
20
)
21
22
-
var log = logging.Logger("fakedata")
23
24
func MeasureIterations(name string) func(int) {
25
start := time.Now()
···
28
return
29
}
30
total := time.Since(start)
31
-
log.Infof("%s wall runtime: count=%d total=%s mean=%s", name, count, total, total/time.Duration(count))
32
}
33
}
34
···
7
"bytes"
8
"context"
9
"fmt"
10
+
"log/slog"
11
"math/rand"
12
"time"
13
···
17
"github.com/bluesky-social/indigo/xrpc"
18
19
"github.com/brianvoe/gofakeit/v6"
20
)
21
22
+
var log = slog.Default().With("system", "fakedata")
23
+
24
+
func SetLogger(logger *slog.Logger) {
25
+
log = logger
26
+
}
27
28
func MeasureIterations(name string) func(int) {
29
start := time.Now()
···
32
return
33
}
34
total := time.Since(start)
35
+
log.Info("wall runtime", "name", name, "count", count, "total", total, "rate", total/time.Duration(count))
36
}
37
}
38
+2
-2
go.mod
+2
-2
go.mod
···
31
github.com/ipfs/go-ipld-cbor v0.1.0
32
github.com/ipfs/go-ipld-format v0.6.0
33
github.com/ipfs/go-libipfs v0.7.0
34
-
github.com/ipfs/go-log v1.0.5
35
-
github.com/ipfs/go-log/v2 v2.5.1
36
github.com/ipld/go-car v0.6.1-0.20230509095817-92d28eb23ba4
37
github.com/ipld/go-car/v2 v2.13.1
38
github.com/jackc/pgx/v5 v5.5.0
···
90
github.com/go-redis/redis v6.15.9+incompatible // indirect
91
github.com/golang/snappy v0.0.4 // indirect
92
github.com/hashicorp/golang-lru v1.0.2 // indirect
93
github.com/jackc/puddle/v2 v2.2.1 // indirect
94
github.com/klauspost/compress v1.17.3 // indirect
95
github.com/kr/pretty v0.3.1 // indirect
···
31
github.com/ipfs/go-ipld-cbor v0.1.0
32
github.com/ipfs/go-ipld-format v0.6.0
33
github.com/ipfs/go-libipfs v0.7.0
34
github.com/ipld/go-car v0.6.1-0.20230509095817-92d28eb23ba4
35
github.com/ipld/go-car/v2 v2.13.1
36
github.com/jackc/pgx/v5 v5.5.0
···
88
github.com/go-redis/redis v6.15.9+incompatible // indirect
89
github.com/golang/snappy v0.0.4 // indirect
90
github.com/hashicorp/golang-lru v1.0.2 // indirect
91
+
github.com/ipfs/go-log v1.0.5 // indirect
92
+
github.com/ipfs/go-log/v2 v2.5.1 // indirect
93
github.com/jackc/puddle/v2 v2.2.1 // indirect
94
github.com/klauspost/compress v1.17.3 // indirect
95
github.com/kr/pretty v0.3.1 // indirect
+6
-2
indexer/crawler.go
+6
-2
indexer/crawler.go
···
3
import (
4
"context"
5
"fmt"
6
"sync"
7
"time"
8
···
29
30
concurrency int
31
32
done chan struct{}
33
}
34
35
-
func NewCrawlDispatcher(repoFn func(context.Context, *crawlWork) error, concurrency int) (*CrawlDispatcher, error) {
36
if concurrency < 1 {
37
return nil, fmt.Errorf("must specify a non-zero positive integer for crawl dispatcher concurrency")
38
}
···
46
concurrency: concurrency,
47
todo: make(map[models.Uid]*crawlWork),
48
inProgress: make(map[models.Uid]*crawlWork),
49
done: make(chan struct{}),
50
}
51
go out.CatchupRepoGaugePoller()
···
218
select {
219
case job := <-c.repoSync:
220
if err := c.doRepoCrawl(context.TODO(), job); err != nil {
221
-
log.Errorf("failed to perform repo crawl of %q: %s", job.act.Did, err)
222
}
223
224
// TODO: do we still just do this if it errors?
···
3
import (
4
"context"
5
"fmt"
6
+
"log/slog"
7
"sync"
8
"time"
9
···
30
31
concurrency int
32
33
+
log *slog.Logger
34
+
35
done chan struct{}
36
}
37
38
+
func NewCrawlDispatcher(repoFn func(context.Context, *crawlWork) error, concurrency int, log *slog.Logger) (*CrawlDispatcher, error) {
39
if concurrency < 1 {
40
return nil, fmt.Errorf("must specify a non-zero positive integer for crawl dispatcher concurrency")
41
}
···
49
concurrency: concurrency,
50
todo: make(map[models.Uid]*crawlWork),
51
inProgress: make(map[models.Uid]*crawlWork),
52
+
log: log,
53
done: make(chan struct{}),
54
}
55
go out.CatchupRepoGaugePoller()
···
222
select {
223
case job := <-c.repoSync:
224
if err := c.doRepoCrawl(context.TODO(), job); err != nil {
225
+
c.log.Error("failed to perform repo crawl", "did", job.act.Did, "err", err)
226
}
227
228
// TODO: do we still just do this if it errors?
+31
-30
indexer/indexer.go
+31
-30
indexer/indexer.go
···
5
"database/sql"
6
"errors"
7
"fmt"
8
"time"
9
10
comatproto "github.com/bluesky-social/indigo/api/atproto"
···
19
"github.com/bluesky-social/indigo/xrpc"
20
21
"github.com/ipfs/go-cid"
22
-
logging "github.com/ipfs/go-log"
23
"go.opentelemetry.io/otel"
24
"gorm.io/gorm"
25
"gorm.io/gorm/clause"
26
)
27
-
28
-
var log = logging.Logger("indexer")
29
30
const MaxEventSliceLength = 1000000
31
const MaxOpsSliceLength = 200
···
45
SendRemoteFollow func(context.Context, string, uint) error
46
CreateExternalUser func(context.Context, string) (*models.ActorInfo, error)
47
ApplyPDSClientSettings func(*xrpc.Client)
48
}
49
50
func NewIndexer(db *gorm.DB, notifman notifs.NotificationManager, evtman *events.EventManager, didr did.Resolver, fetcher *RepoFetcher, crawl, aggregate, spider bool) (*Indexer, error) {
···
65
return nil
66
},
67
ApplyPDSClientSettings: func(*xrpc.Client) {},
68
}
69
70
if crawl {
71
-
c, err := NewCrawlDispatcher(fetcher.FetchAndIndexRepo, fetcher.MaxConcurrency)
72
if err != nil {
73
return nil, err
74
}
···
90
ctx, span := otel.Tracer("indexer").Start(ctx, "HandleRepoEvent")
91
defer span.End()
92
93
-
log.Debugw("Handling Repo Event!", "uid", evt.User)
94
95
outops := make([]*comatproto.SyncSubscribeRepos_RepoOp, 0, len(evt.Ops))
96
for _, op := range evt.Ops {
···
102
})
103
104
if err := ix.handleRepoOp(ctx, evt, &op); err != nil {
105
-
log.Errorw("failed to handle repo op", "err", err)
106
}
107
}
108
···
119
toobig = true
120
}
121
122
-
log.Debugw("Sending event", "did", did)
123
if err := ix.events.AddEvent(ctx, &events.XRPCStreamEvent{
124
RepoCommit: &comatproto.SyncSubscribeRepos_Commit{
125
Repo: did,
···
197
if e.Type == "mention" {
198
_, err := ix.GetUserOrMissing(ctx, e.Value)
199
if err != nil {
200
-
log.Infow("failed to parse user mention", "ref", e.Value, "err", err)
201
}
202
}
203
}
···
205
if rec.Reply != nil {
206
if rec.Reply.Parent != nil {
207
if err := ix.crawlAtUriRef(ctx, rec.Reply.Parent.Uri); err != nil {
208
-
log.Infow("failed to crawl reply parent", "cid", op.RecCid, "replyuri", rec.Reply.Parent.Uri, "err", err)
209
}
210
}
211
212
if rec.Reply.Root != nil {
213
if err := ix.crawlAtUriRef(ctx, rec.Reply.Root.Uri); err != nil {
214
-
log.Infow("failed to crawl reply root", "cid", op.RecCid, "rooturi", rec.Reply.Root.Uri, "err", err)
215
}
216
}
217
}
···
220
case *bsky.FeedRepost:
221
if rec.Subject != nil {
222
if err := ix.crawlAtUriRef(ctx, rec.Subject.Uri); err != nil {
223
-
log.Infow("failed to crawl repost subject", "cid", op.RecCid, "subjecturi", rec.Subject.Uri, "err", err)
224
}
225
}
226
return nil
227
case *bsky.FeedLike:
228
if rec.Subject != nil {
229
if err := ix.crawlAtUriRef(ctx, rec.Subject.Uri); err != nil {
230
-
log.Infow("failed to crawl like subject", "cid", op.RecCid, "subjecturi", rec.Subject.Uri, "err", err)
231
}
232
}
233
return nil
234
case *bsky.GraphFollow:
235
_, err := ix.GetUserOrMissing(ctx, rec.Subject)
236
if err != nil {
237
-
log.Infow("failed to crawl follow subject", "cid", op.RecCid, "subjectdid", rec.Subject, "err", err)
238
}
239
return nil
240
case *bsky.GraphBlock:
241
_, err := ix.GetUserOrMissing(ctx, rec.Subject)
242
if err != nil {
243
-
log.Infow("failed to crawl follow subject", "cid", op.RecCid, "subjectdid", rec.Subject, "err", err)
244
}
245
return nil
246
case *bsky.ActorProfile:
···
252
case *bsky.FeedGenerator:
253
return nil
254
default:
255
-
log.Warnw("unrecognized record type (crawling references)", "record", op.Record, "collection", op.Collection)
256
return nil
257
}
258
}
···
293
}
294
295
func (ix *Indexer) addUserToCrawler(ctx context.Context, ai *models.ActorInfo) error {
296
-
log.Debugw("Sending user to crawler: ", "did", ai.Did)
297
if ix.Crawler == nil {
298
return nil
299
}
···
395
}
396
397
func (ix *Indexer) handleRecordDelete(ctx context.Context, evt *repomgr.RepoEvent, op *repomgr.RepoOp, local bool) error {
398
-
log.Debugw("record delete event", "collection", op.Collection)
399
400
switch op.Collection {
401
case "app.bsky.feed.post":
···
411
fp, err := ix.GetPost(ctx, uri)
412
if err != nil {
413
if errors.Is(err, gorm.ErrRecordNotFound) {
414
-
log.Warnw("deleting post weve never seen before. Weird.", "user", evt.User, "rkey", op.Rkey)
415
return nil
416
}
417
return err
···
425
return err
426
}
427
428
-
log.Warn("TODO: remove notifications on delete")
429
/*
430
if err := ix.notifman.RemoveRepost(ctx, fp.Author, rr.ID, evt.User); err != nil {
431
return nil, err
···
466
return err
467
}
468
469
-
log.Warnf("need to delete vote notification")
470
return nil
471
}
472
···
477
}
478
479
if q.RowsAffected == 0 {
480
-
log.Warnw("attempted to delete follow we did not have a record for", "user", evt.User, "rkey", op.Rkey)
481
return nil
482
}
483
···
485
}
486
487
func (ix *Indexer) handleRecordCreate(ctx context.Context, evt *repomgr.RepoEvent, op *repomgr.RepoOp, local bool) ([]uint, error) {
488
-
log.Debugw("record create event", "collection", op.Collection)
489
490
var out []uint
491
switch rec := op.Record.(type) {
···
535
case *bsky.FeedGenerator:
536
return out, nil
537
case *bsky.ActorProfile:
538
-
log.Debugf("TODO: got actor profile record creation, need to do something with this")
539
default:
540
return nil, fmt.Errorf("unrecognized record type (creation): %s", op.Collection)
541
}
···
609
}
610
611
func (ix *Indexer) handleRecordUpdate(ctx context.Context, evt *repomgr.RepoEvent, op *repomgr.RepoOp, local bool) error {
612
-
log.Debugw("record update event", "collection", op.Collection)
613
614
switch rec := op.Record.(type) {
615
case *bsky.FeedPost:
···
629
630
if oldReply != newReply {
631
// the 'replyness' of the post was changed... that's weird
632
-
log.Errorf("need to properly handle case where reply-ness of posts is changed")
633
return nil
634
}
635
···
640
}
641
642
if replyto.ID != fp.ReplyTo {
643
-
log.Errorf("post was changed to be a reply to a different post")
644
return nil
645
}
646
}
···
693
694
return ix.handleRecordCreateGraphFollow(ctx, rec, evt, op)
695
case *bsky.ActorProfile:
696
-
log.Debugf("TODO: got actor profile record update, need to do something with this")
697
default:
698
return fmt.Errorf("unrecognized record type (update): %s", op.Collection)
699
}
···
767
// we're likely filling in a missing reference
768
if !maybe.Missing {
769
// TODO: we've already processed this record creation
770
-
log.Warnw("potentially erroneous event, duplicate create", "rkey", rkey, "user", user)
771
}
772
773
if err := ix.db.Clauses(clause.OnConflict{
···
791
}
792
793
func (ix *Indexer) createMissingPostRecord(ctx context.Context, puri *util.ParsedUri) (*models.FeedPost, error) {
794
-
log.Warn("creating missing post record")
795
ai, err := ix.GetUserOrMissing(ctx, puri.Did)
796
if err != nil {
797
return nil, err
···
813
if post.Reply != nil {
814
replyto, err := ix.GetPost(ctx, post.Reply.Parent.Uri)
815
if err != nil {
816
-
log.Error("probably shouldn't error when processing a reply to a not-found post")
817
return err
818
}
819
···
5
"database/sql"
6
"errors"
7
"fmt"
8
+
"log/slog"
9
"time"
10
11
comatproto "github.com/bluesky-social/indigo/api/atproto"
···
20
"github.com/bluesky-social/indigo/xrpc"
21
22
"github.com/ipfs/go-cid"
23
"go.opentelemetry.io/otel"
24
"gorm.io/gorm"
25
"gorm.io/gorm/clause"
26
)
27
28
const MaxEventSliceLength = 1000000
29
const MaxOpsSliceLength = 200
···
43
SendRemoteFollow func(context.Context, string, uint) error
44
CreateExternalUser func(context.Context, string) (*models.ActorInfo, error)
45
ApplyPDSClientSettings func(*xrpc.Client)
46
+
47
+
log *slog.Logger
48
}
49
50
func NewIndexer(db *gorm.DB, notifman notifs.NotificationManager, evtman *events.EventManager, didr did.Resolver, fetcher *RepoFetcher, crawl, aggregate, spider bool) (*Indexer, error) {
···
65
return nil
66
},
67
ApplyPDSClientSettings: func(*xrpc.Client) {},
68
+
log: slog.Default().With("system", "indexer"),
69
}
70
71
if crawl {
72
+
c, err := NewCrawlDispatcher(fetcher.FetchAndIndexRepo, fetcher.MaxConcurrency, ix.log)
73
if err != nil {
74
return nil, err
75
}
···
91
ctx, span := otel.Tracer("indexer").Start(ctx, "HandleRepoEvent")
92
defer span.End()
93
94
+
ix.log.Debug("Handling Repo Event!", "uid", evt.User)
95
96
outops := make([]*comatproto.SyncSubscribeRepos_RepoOp, 0, len(evt.Ops))
97
for _, op := range evt.Ops {
···
103
})
104
105
if err := ix.handleRepoOp(ctx, evt, &op); err != nil {
106
+
ix.log.Error("failed to handle repo op", "err", err)
107
}
108
}
109
···
120
toobig = true
121
}
122
123
+
ix.log.Debug("Sending event", "did", did)
124
if err := ix.events.AddEvent(ctx, &events.XRPCStreamEvent{
125
RepoCommit: &comatproto.SyncSubscribeRepos_Commit{
126
Repo: did,
···
198
if e.Type == "mention" {
199
_, err := ix.GetUserOrMissing(ctx, e.Value)
200
if err != nil {
201
+
ix.log.Info("failed to parse user mention", "ref", e.Value, "err", err)
202
}
203
}
204
}
···
206
if rec.Reply != nil {
207
if rec.Reply.Parent != nil {
208
if err := ix.crawlAtUriRef(ctx, rec.Reply.Parent.Uri); err != nil {
209
+
ix.log.Info("failed to crawl reply parent", "cid", op.RecCid, "replyuri", rec.Reply.Parent.Uri, "err", err)
210
}
211
}
212
213
if rec.Reply.Root != nil {
214
if err := ix.crawlAtUriRef(ctx, rec.Reply.Root.Uri); err != nil {
215
+
ix.log.Info("failed to crawl reply root", "cid", op.RecCid, "rooturi", rec.Reply.Root.Uri, "err", err)
216
}
217
}
218
}
···
221
case *bsky.FeedRepost:
222
if rec.Subject != nil {
223
if err := ix.crawlAtUriRef(ctx, rec.Subject.Uri); err != nil {
224
+
ix.log.Info("failed to crawl repost subject", "cid", op.RecCid, "subjecturi", rec.Subject.Uri, "err", err)
225
}
226
}
227
return nil
228
case *bsky.FeedLike:
229
if rec.Subject != nil {
230
if err := ix.crawlAtUriRef(ctx, rec.Subject.Uri); err != nil {
231
+
ix.log.Info("failed to crawl like subject", "cid", op.RecCid, "subjecturi", rec.Subject.Uri, "err", err)
232
}
233
}
234
return nil
235
case *bsky.GraphFollow:
236
_, err := ix.GetUserOrMissing(ctx, rec.Subject)
237
if err != nil {
238
+
ix.log.Info("failed to crawl follow subject", "cid", op.RecCid, "subjectdid", rec.Subject, "err", err)
239
}
240
return nil
241
case *bsky.GraphBlock:
242
_, err := ix.GetUserOrMissing(ctx, rec.Subject)
243
if err != nil {
244
+
ix.log.Info("failed to crawl follow subject", "cid", op.RecCid, "subjectdid", rec.Subject, "err", err)
245
}
246
return nil
247
case *bsky.ActorProfile:
···
253
case *bsky.FeedGenerator:
254
return nil
255
default:
256
+
ix.log.Warn("unrecognized record type (crawling references)", "record", op.Record, "collection", op.Collection)
257
return nil
258
}
259
}
···
294
}
295
296
func (ix *Indexer) addUserToCrawler(ctx context.Context, ai *models.ActorInfo) error {
297
+
ix.log.Debug("Sending user to crawler: ", "did", ai.Did)
298
if ix.Crawler == nil {
299
return nil
300
}
···
396
}
397
398
func (ix *Indexer) handleRecordDelete(ctx context.Context, evt *repomgr.RepoEvent, op *repomgr.RepoOp, local bool) error {
399
+
ix.log.Debug("record delete event", "collection", op.Collection)
400
401
switch op.Collection {
402
case "app.bsky.feed.post":
···
412
fp, err := ix.GetPost(ctx, uri)
413
if err != nil {
414
if errors.Is(err, gorm.ErrRecordNotFound) {
415
+
ix.log.Warn("deleting post weve never seen before. Weird.", "user", evt.User, "rkey", op.Rkey)
416
return nil
417
}
418
return err
···
426
return err
427
}
428
429
+
ix.log.Warn("TODO: remove notifications on delete")
430
/*
431
if err := ix.notifman.RemoveRepost(ctx, fp.Author, rr.ID, evt.User); err != nil {
432
return nil, err
···
467
return err
468
}
469
470
+
ix.log.Warn("need to delete vote notification")
471
return nil
472
}
473
···
478
}
479
480
if q.RowsAffected == 0 {
481
+
ix.log.Warn("attempted to delete follow we did not have a record for", "user", evt.User, "rkey", op.Rkey)
482
return nil
483
}
484
···
486
}
487
488
func (ix *Indexer) handleRecordCreate(ctx context.Context, evt *repomgr.RepoEvent, op *repomgr.RepoOp, local bool) ([]uint, error) {
489
+
ix.log.Debug("record create event", "collection", op.Collection)
490
491
var out []uint
492
switch rec := op.Record.(type) {
···
536
case *bsky.FeedGenerator:
537
return out, nil
538
case *bsky.ActorProfile:
539
+
ix.log.Debug("TODO: got actor profile record creation, need to do something with this")
540
default:
541
return nil, fmt.Errorf("unrecognized record type (creation): %s", op.Collection)
542
}
···
610
}
611
612
func (ix *Indexer) handleRecordUpdate(ctx context.Context, evt *repomgr.RepoEvent, op *repomgr.RepoOp, local bool) error {
613
+
ix.log.Debug("record update event", "collection", op.Collection)
614
615
switch rec := op.Record.(type) {
616
case *bsky.FeedPost:
···
630
631
if oldReply != newReply {
632
// the 'replyness' of the post was changed... that's weird
633
+
ix.log.Error("need to properly handle case where reply-ness of posts is changed")
634
return nil
635
}
636
···
641
}
642
643
if replyto.ID != fp.ReplyTo {
644
+
ix.log.Error("post was changed to be a reply to a different post")
645
return nil
646
}
647
}
···
694
695
return ix.handleRecordCreateGraphFollow(ctx, rec, evt, op)
696
case *bsky.ActorProfile:
697
+
ix.log.Debug("TODO: got actor profile record update, need to do something with this")
698
default:
699
return fmt.Errorf("unrecognized record type (update): %s", op.Collection)
700
}
···
768
// we're likely filling in a missing reference
769
if !maybe.Missing {
770
// TODO: we've already processed this record creation
771
+
ix.log.Warn("potentially erroneous event, duplicate create", "rkey", rkey, "user", user)
772
}
773
774
if err := ix.db.Clauses(clause.OnConflict{
···
792
}
793
794
func (ix *Indexer) createMissingPostRecord(ctx context.Context, puri *util.ParsedUri) (*models.FeedPost, error) {
795
+
ix.log.Warn("creating missing post record")
796
ai, err := ix.GetUserOrMissing(ctx, puri.Did)
797
if err != nil {
798
return nil, err
···
814
if post.Reply != nil {
815
replyto, err := ix.GetPost(ctx, post.Reply.Parent.Uri)
816
if err != nil {
817
+
ix.log.Error("probably shouldn't error when processing a reply to a not-found post")
818
return err
819
}
820
+5
-1
indexer/keymgr.go
+5
-1
indexer/keymgr.go
···
3
import (
4
"context"
5
"fmt"
6
7
did "github.com/whyrusleeping/go-did"
8
"go.opentelemetry.io/otel"
···
12
didr DidResolver
13
14
signingKey *did.PrivKey
15
}
16
17
type DidResolver interface {
···
22
return &KeyManager{
23
didr: didr,
24
signingKey: k,
25
}
26
}
27
···
36
37
err = k.Verify(msg, sig)
38
if err != nil {
39
-
log.Warnw("signature failed to verify", "err", err, "did", did, "pubKey", k, "sigBytes", sig, "msgBytes", msg)
40
}
41
return err
42
}
···
3
import (
4
"context"
5
"fmt"
6
+
"log/slog"
7
8
did "github.com/whyrusleeping/go-did"
9
"go.opentelemetry.io/otel"
···
13
didr DidResolver
14
15
signingKey *did.PrivKey
16
+
17
+
log *slog.Logger
18
}
19
20
type DidResolver interface {
···
25
return &KeyManager{
26
didr: didr,
27
signingKey: k,
28
+
log: slog.Default().With("system", "indexer"),
29
}
30
}
31
···
40
41
err = k.Verify(msg, sig)
42
if err != nil {
43
+
km.log.Warn("signature failed to verify", "err", err, "did", did, "pubKey", k, "sigBytes", sig, "msgBytes", msg)
44
}
45
return err
46
}
+7
-3
indexer/repofetch.go
+7
-3
indexer/repofetch.go
···
7
"fmt"
8
"io"
9
"io/fs"
10
"sync"
11
12
"github.com/bluesky-social/indigo/api/atproto"
···
27
Limiters: make(map[uint]*rate.Limiter),
28
ApplyPDSClientSettings: func(*xrpc.Client) {},
29
MaxConcurrency: maxConcurrency,
30
}
31
}
32
···
40
MaxConcurrency int
41
42
ApplyPDSClientSettings func(*xrpc.Client)
43
}
44
45
func (rf *RepoFetcher) GetLimiter(pdsID uint) *rate.Limiter {
···
84
// Wait to prevent DOSing the PDS when connecting to a new stream with lots of active repos
85
limiter.Wait(ctx)
86
87
-
log.Debugw("SyncGetRepo", "did", did, "since", rev)
88
// TODO: max size on these? A malicious PDS could just send us a petabyte sized repo here and kill us
89
repo, err := atproto.SyncGetRepo(ctx, c, did, rev)
90
if err != nil {
···
125
for i, j := range job.catchup {
126
catchupEventsProcessed.Inc()
127
if err := rf.repoman.HandleExternalUserEvent(ctx, pds.ID, ai.Uid, ai.Did, j.evt.Since, j.evt.Rev, j.evt.Blocks, j.evt.Ops); err != nil {
128
-
log.Errorw("buffered event catchup failed", "error", err, "did", ai.Did, "i", i, "jobCount", len(job.catchup), "seq", j.evt.Seq)
129
resync = true // fall back to a repo sync
130
break
131
}
···
153
span.RecordError(err)
154
155
if ipld.IsNotFound(err) || errors.Is(err, io.EOF) || errors.Is(err, fs.ErrNotExist) {
156
-
log.Errorw("partial repo fetch was missing data", "did", ai.Did, "pds", pds.Host, "rev", rev)
157
repo, err := rf.fetchRepo(ctx, c, &pds, ai.Did, "")
158
if err != nil {
159
return err
···
7
"fmt"
8
"io"
9
"io/fs"
10
+
"log/slog"
11
"sync"
12
13
"github.com/bluesky-social/indigo/api/atproto"
···
28
Limiters: make(map[uint]*rate.Limiter),
29
ApplyPDSClientSettings: func(*xrpc.Client) {},
30
MaxConcurrency: maxConcurrency,
31
+
log: slog.Default().With("system", "indexer"),
32
}
33
}
34
···
42
MaxConcurrency int
43
44
ApplyPDSClientSettings func(*xrpc.Client)
45
+
46
+
log *slog.Logger
47
}
48
49
func (rf *RepoFetcher) GetLimiter(pdsID uint) *rate.Limiter {
···
88
// Wait to prevent DOSing the PDS when connecting to a new stream with lots of active repos
89
limiter.Wait(ctx)
90
91
+
rf.log.Debug("SyncGetRepo", "did", did, "since", rev)
92
// TODO: max size on these? A malicious PDS could just send us a petabyte sized repo here and kill us
93
repo, err := atproto.SyncGetRepo(ctx, c, did, rev)
94
if err != nil {
···
129
for i, j := range job.catchup {
130
catchupEventsProcessed.Inc()
131
if err := rf.repoman.HandleExternalUserEvent(ctx, pds.ID, ai.Uid, ai.Did, j.evt.Since, j.evt.Rev, j.evt.Blocks, j.evt.Ops); err != nil {
132
+
rf.log.Error("buffered event catchup failed", "error", err, "did", ai.Did, "i", i, "jobCount", len(job.catchup), "seq", j.evt.Seq)
133
resync = true // fall back to a repo sync
134
break
135
}
···
157
span.RecordError(err)
158
159
if ipld.IsNotFound(err) || errors.Is(err, io.EOF) || errors.Is(err, fs.ErrNotExist) {
160
+
rf.log.Error("partial repo fetch was missing data", "did", ai.Did, "pds", pds.Host, "rev", rev)
161
repo, err := rf.fetchRepo(ctx, c, &pds, ai.Did, "")
162
if err != nil {
163
return err
+6
-2
pds/feedgen.go
+6
-2
pds/feedgen.go
···
3
import (
4
"context"
5
"fmt"
6
"sort"
7
"strings"
8
"time"
···
22
ix *indexer.Indexer
23
24
readRecord ReadRecordFunc
25
}
26
27
-
func NewFeedGenerator(db *gorm.DB, ix *indexer.Indexer, readRecord ReadRecordFunc) (*FeedGenerator, error) {
28
return &FeedGenerator{
29
db: db,
30
ix: ix,
31
readRecord: readRecord,
32
}, nil
33
}
34
···
355
356
func (fg *FeedGenerator) GetVotes(ctx context.Context, uri string, pcid cid.Cid, limit int, before string) ([]*HydratedVote, error) {
357
if before != "" {
358
-
log.Warn("not respecting 'before' yet")
359
}
360
361
p, err := fg.ix.GetPost(ctx, uri)
···
3
import (
4
"context"
5
"fmt"
6
+
"log/slog"
7
"sort"
8
"strings"
9
"time"
···
23
ix *indexer.Indexer
24
25
readRecord ReadRecordFunc
26
+
27
+
log *slog.Logger
28
}
29
30
+
func NewFeedGenerator(db *gorm.DB, ix *indexer.Indexer, readRecord ReadRecordFunc, log *slog.Logger) (*FeedGenerator, error) {
31
return &FeedGenerator{
32
db: db,
33
ix: ix,
34
readRecord: readRecord,
35
+
log: log,
36
}, nil
37
}
38
···
359
360
func (fg *FeedGenerator) GetVotes(ctx context.Context, uri string, pcid cid.Cid, limit int, before string) ([]*HydratedVote, error) {
361
if before != "" {
362
+
fg.log.Warn("not respecting 'before' yet")
363
}
364
365
p, err := fg.ix.GetPost(ctx, uri)
+9
-7
pds/server.go
+9
-7
pds/server.go
···
5
"database/sql"
6
"errors"
7
"fmt"
8
"net"
9
"net/http"
10
"net/mail"
···
30
gojwt "github.com/golang-jwt/jwt"
31
"github.com/gorilla/websocket"
32
"github.com/ipfs/go-cid"
33
-
logging "github.com/ipfs/go-log"
34
"github.com/labstack/echo/v4"
35
"github.com/labstack/echo/v4/middleware"
36
"github.com/lestrrat-go/jwx/v2/jwt"
37
"github.com/whyrusleeping/go-did"
38
"gorm.io/gorm"
39
)
40
-
41
-
var log = logging.Logger("pds")
42
43
type Server struct {
44
db *gorm.DB
···
57
serviceUrl string
58
59
plc plc.PLCClient
60
}
61
62
// serverListenerBootTimeout is how long to wait for the requested server socket
···
97
serviceUrl: serviceUrl,
98
jwtSigningKey: jwtkey,
99
enforcePeering: false,
100
}
101
102
repoman.SetEventHandler(func(ctx context.Context, evt *repomgr.RepoEvent) {
103
if err := ix.HandleRepoEvent(ctx, evt); err != nil {
104
-
log.Errorw("handle repo event failed", "user", evt.User, "err", err)
105
}
106
}, true)
107
108
//ix.SendRemoteFollow = s.sendRemoteFollow
109
ix.CreateExternalUser = s.createExternalUser
110
111
-
feedgen, err := NewFeedGenerator(db, ix, s.readRecordFunc)
112
if err != nil {
113
return nil, err
114
}
···
434
435
func (s *Server) HandleHealthCheck(c echo.Context) error {
436
if err := s.db.Exec("SELECT 1").Error; err != nil {
437
-
log.Errorf("healthcheck can't connect to database: %v", err)
438
return c.JSON(500, HealthStatus{Status: "error", Message: "can't connect to database"})
439
} else {
440
return c.JSON(200, HealthStatus{Status: "ok"})
···
726
func (s *Server) UpdateUserHandle(ctx context.Context, u *User, handle string) error {
727
if u.Handle == handle {
728
// no change? move on
729
-
log.Warnw("attempted to change handle to current handle", "did", u.Did, "handle", handle)
730
return nil
731
}
732
···
5
"database/sql"
6
"errors"
7
"fmt"
8
+
"log/slog"
9
"net"
10
"net/http"
11
"net/mail"
···
31
gojwt "github.com/golang-jwt/jwt"
32
"github.com/gorilla/websocket"
33
"github.com/ipfs/go-cid"
34
"github.com/labstack/echo/v4"
35
"github.com/labstack/echo/v4/middleware"
36
"github.com/lestrrat-go/jwx/v2/jwt"
37
"github.com/whyrusleeping/go-did"
38
"gorm.io/gorm"
39
)
40
41
type Server struct {
42
db *gorm.DB
···
55
serviceUrl string
56
57
plc plc.PLCClient
58
+
59
+
log *slog.Logger
60
}
61
62
// serverListenerBootTimeout is how long to wait for the requested server socket
···
97
serviceUrl: serviceUrl,
98
jwtSigningKey: jwtkey,
99
enforcePeering: false,
100
+
101
+
log: slog.Default().With("system", "pds"),
102
}
103
104
repoman.SetEventHandler(func(ctx context.Context, evt *repomgr.RepoEvent) {
105
if err := ix.HandleRepoEvent(ctx, evt); err != nil {
106
+
s.log.Error("handle repo event failed", "user", evt.User, "err", err)
107
}
108
}, true)
109
110
//ix.SendRemoteFollow = s.sendRemoteFollow
111
ix.CreateExternalUser = s.createExternalUser
112
113
+
feedgen, err := NewFeedGenerator(db, ix, s.readRecordFunc, s.log)
114
if err != nil {
115
return nil, err
116
}
···
436
437
func (s *Server) HandleHealthCheck(c echo.Context) error {
438
if err := s.db.Exec("SELECT 1").Error; err != nil {
439
+
s.log.Error("healthcheck can't connect to database", "err", err)
440
return c.JSON(500, HealthStatus{Status: "error", Message: "can't connect to database"})
441
} else {
442
return c.JSON(200, HealthStatus{Status: "ok"})
···
728
func (s *Server) UpdateUserHandle(ctx context.Context, u *User, handle string) error {
729
if u.Handle == handle {
730
// no change? move on
731
+
s.log.Warn("attempted to change handle to current handle", "did", u.Did, "handle", handle)
732
return nil
733
}
734
+13
-12
repomgr/repomgr.go
+13
-12
repomgr/repomgr.go
···
6
"errors"
7
"fmt"
8
"io"
9
"strings"
10
"sync"
11
"time"
···
24
"github.com/ipfs/go-datastore"
25
blockstore "github.com/ipfs/go-ipfs-blockstore"
26
ipld "github.com/ipfs/go-ipld-format"
27
-
logging "github.com/ipfs/go-log/v2"
28
"github.com/ipld/go-car"
29
cbg "github.com/whyrusleeping/cbor-gen"
30
"go.opentelemetry.io/otel"
31
"go.opentelemetry.io/otel/attribute"
32
"gorm.io/gorm"
33
)
34
-
35
-
var log = logging.Logger("repomgr")
36
37
func NewRepoManager(cs carstore.CarStore, kmgr KeyManager) *RepoManager {
38
···
40
cs: cs,
41
userLocks: make(map[models.Uid]*userLock),
42
kmgr: kmgr,
43
}
44
}
45
···
62
63
events func(context.Context, *RepoEvent)
64
hydrateRecords bool
65
}
66
67
type ActorInfo struct {
···
534
535
span.SetAttributes(attribute.Int64("uid", int64(uid)))
536
537
-
log.Debugw("HandleExternalUserEvent", "pds", pdsid, "uid", uid, "since", since, "nrev", nrev)
538
539
unlock := rm.lockUser(ctx, uid)
540
defer unlock()
···
835
ops := make([]RepoOp, 0, len(diffops))
836
for _, op := range diffops {
837
repoOpsImported.Inc()
838
-
out, err := processOp(ctx, bs, op, rm.hydrateRecords)
839
if err != nil {
840
-
log.Errorw("failed to process repo op", "err", err, "path", op.Rpath, "repo", repoDid)
841
}
842
843
if out != nil {
···
871
return nil
872
}
873
874
-
func processOp(ctx context.Context, bs blockstore.Blockstore, op *mst.DiffOp, hydrateRecords bool) (*RepoOp, error) {
875
parts := strings.SplitN(op.Rpath, "/", 2)
876
if len(parts) != 2 {
877
return nil, fmt.Errorf("repo mst had invalid rpath: %q", op.Rpath)
···
904
return nil, err
905
}
906
907
-
log.Warnf("failed processing repo diff: %s", err)
908
} else {
909
outop.Record = rec
910
}
···
960
// the repos lifecycle, this will end up erroneously not including
961
// them. We should compute the set of blocks needed to read any repo
962
// ops that happened in the commit and use that for our 'output' blocks
963
-
cids, err := walkTree(ctx, seen, root, membs, true)
964
if err != nil {
965
return fmt.Errorf("walkTree: %w", err)
966
}
···
1001
1002
// walkTree returns all cids linked recursively by the root, skipping any cids
1003
// in the 'skip' map, and not erroring on 'not found' if prevMissing is set
1004
-
func walkTree(ctx context.Context, skip map[cid.Cid]bool, root cid.Cid, bs blockstore.Blockstore, prevMissing bool) ([]cid.Cid, error) {
1005
// TODO: what if someone puts non-cbor links in their repo?
1006
if root.Prefix().Codec != cid.DagCBOR {
1007
return nil, fmt.Errorf("can only handle dag-cbor objects in repos (%s is %d)", root, root.Prefix().Codec)
···
1015
var links []cid.Cid
1016
if err := cbg.ScanForLinks(bytes.NewReader(blk.RawData()), func(c cid.Cid) {
1017
if c.Prefix().Codec == cid.Raw {
1018
-
log.Debugw("skipping 'raw' CID in record", "recordCid", root, "rawCid", c)
1019
return
1020
}
1021
if skip[c] {
···
1035
1036
// TODO: should do this non-recursive since i expect these may get deep
1037
for _, c := range links {
1038
-
sub, err := walkTree(ctx, skip, c, bs, prevMissing)
1039
if err != nil {
1040
if prevMissing && !ipld.IsNotFound(err) {
1041
return nil, err
···
6
"errors"
7
"fmt"
8
"io"
9
+
"log/slog"
10
"strings"
11
"sync"
12
"time"
···
25
"github.com/ipfs/go-datastore"
26
blockstore "github.com/ipfs/go-ipfs-blockstore"
27
ipld "github.com/ipfs/go-ipld-format"
28
"github.com/ipld/go-car"
29
cbg "github.com/whyrusleeping/cbor-gen"
30
"go.opentelemetry.io/otel"
31
"go.opentelemetry.io/otel/attribute"
32
"gorm.io/gorm"
33
)
34
35
func NewRepoManager(cs carstore.CarStore, kmgr KeyManager) *RepoManager {
36
···
38
cs: cs,
39
userLocks: make(map[models.Uid]*userLock),
40
kmgr: kmgr,
41
+
log: slog.Default().With("system", "repomgr"),
42
}
43
}
44
···
61
62
events func(context.Context, *RepoEvent)
63
hydrateRecords bool
64
+
65
+
log *slog.Logger
66
}
67
68
type ActorInfo struct {
···
535
536
span.SetAttributes(attribute.Int64("uid", int64(uid)))
537
538
+
rm.log.Debug("HandleExternalUserEvent", "pds", pdsid, "uid", uid, "since", since, "nrev", nrev)
539
540
unlock := rm.lockUser(ctx, uid)
541
defer unlock()
···
836
ops := make([]RepoOp, 0, len(diffops))
837
for _, op := range diffops {
838
repoOpsImported.Inc()
839
+
out, err := rm.processOp(ctx, bs, op, rm.hydrateRecords)
840
if err != nil {
841
+
rm.log.Error("failed to process repo op", "err", err, "path", op.Rpath, "repo", repoDid)
842
}
843
844
if out != nil {
···
872
return nil
873
}
874
875
+
func (rm *RepoManager) processOp(ctx context.Context, bs blockstore.Blockstore, op *mst.DiffOp, hydrateRecords bool) (*RepoOp, error) {
876
parts := strings.SplitN(op.Rpath, "/", 2)
877
if len(parts) != 2 {
878
return nil, fmt.Errorf("repo mst had invalid rpath: %q", op.Rpath)
···
905
return nil, err
906
}
907
908
+
rm.log.Warn("failed processing repo diff", "err", err)
909
} else {
910
outop.Record = rec
911
}
···
961
// the repos lifecycle, this will end up erroneously not including
962
// them. We should compute the set of blocks needed to read any repo
963
// ops that happened in the commit and use that for our 'output' blocks
964
+
cids, err := rm.walkTree(ctx, seen, root, membs, true)
965
if err != nil {
966
return fmt.Errorf("walkTree: %w", err)
967
}
···
1002
1003
// walkTree returns all cids linked recursively by the root, skipping any cids
1004
// in the 'skip' map, and not erroring on 'not found' if prevMissing is set
1005
+
func (rm *RepoManager) walkTree(ctx context.Context, skip map[cid.Cid]bool, root cid.Cid, bs blockstore.Blockstore, prevMissing bool) ([]cid.Cid, error) {
1006
// TODO: what if someone puts non-cbor links in their repo?
1007
if root.Prefix().Codec != cid.DagCBOR {
1008
return nil, fmt.Errorf("can only handle dag-cbor objects in repos (%s is %d)", root, root.Prefix().Codec)
···
1016
var links []cid.Cid
1017
if err := cbg.ScanForLinks(bytes.NewReader(blk.RawData()), func(c cid.Cid) {
1018
if c.Prefix().Codec == cid.Raw {
1019
+
rm.log.Debug("skipping 'raw' CID in record", "recordCid", root, "rawCid", c)
1020
return
1021
}
1022
if skip[c] {
···
1036
1037
// TODO: should do this non-recursive since i expect these may get deep
1038
for _, c := range links {
1039
+
sub, err := rm.walkTree(ctx, skip, c, bs, prevMissing)
1040
if err != nil {
1041
if prevMissing && !ipld.IsNotFound(err) {
1042
return nil, err
+1
search/firehose.go
+1
search/firehose.go
+25
-21
splitter/splitter.go
+25
-21
splitter/splitter.go
···
5
"errors"
6
"fmt"
7
"io"
8
"math/rand"
9
"net"
10
"net/http"
···
18
events "github.com/bluesky-social/indigo/events"
19
"github.com/bluesky-social/indigo/events/schedulers/sequential"
20
"github.com/gorilla/websocket"
21
-
logging "github.com/ipfs/go-log"
22
"github.com/labstack/echo/v4"
23
"github.com/labstack/echo/v4/middleware"
24
promclient "github.com/prometheus/client_golang/prometheus"
···
26
dto "github.com/prometheus/client_model/go"
27
)
28
29
-
var log = logging.Logger("splitter")
30
-
31
type Splitter struct {
32
erb *EventRingBuffer
33
pp *events.PebblePersist
···
39
consumers map[uint64]*SocketConsumer
40
41
conf SplitterConfig
42
}
43
44
type SplitterConfig struct {
···
61
erb: erb,
62
events: em,
63
consumers: make(map[uint64]*SocketConsumer),
64
}
65
}
66
func NewSplitter(conf SplitterConfig) (*Splitter, error) {
···
74
erb: erb,
75
events: em,
76
consumers: make(map[uint64]*SocketConsumer),
77
}, nil
78
} else {
79
pp, err := events.NewPebblePersistance(conf.PebbleOptions)
···
88
pp: pp,
89
events: em,
90
consumers: make(map[uint64]*SocketConsumer),
91
}, nil
92
}
93
}
···
115
pp: pp,
116
events: em,
117
consumers: make(map[uint64]*SocketConsumer),
118
}, nil
119
}
120
···
173
if err2 := ctx.JSON(err.Code, map[string]any{
174
"error": err.Message,
175
}); err2 != nil {
176
-
log.Errorf("Failed to write http error: %s", err2)
177
}
178
default:
179
sendHeader := true
···
181
sendHeader = false
182
}
183
184
-
log.Warnf("HANDLER ERROR: (%s) %s", ctx.Path(), err)
185
186
if strings.HasPrefix(ctx.Path(), "/admin/") {
187
ctx.JSON(500, map[string]any{
···
275
}
276
277
if err := conn.WriteControl(websocket.PingMessage, []byte{}, time.Now().Add(5*time.Second)); err != nil {
278
-
log.Errorf("failed to ping client: %s", err)
279
cancel()
280
return
281
}
···
300
for {
301
_, _, err := conn.ReadMessage()
302
if err != nil {
303
-
log.Errorf("failed to read message from client: %s", err)
304
cancel()
305
return
306
}
···
327
consumerID := s.registerConsumer(&consumer)
328
defer s.cleanupConsumer(consumerID)
329
330
-
log.Infow("new consumer",
331
"remote_addr", consumer.RemoteAddr,
332
"user_agent", consumer.UserAgent,
333
"cursor", since,
···
340
select {
341
case evt, ok := <-evts:
342
if !ok {
343
-
log.Error("event stream closed unexpectedly")
344
return nil
345
}
346
347
wc, err := conn.NextWriter(websocket.BinaryMessage)
348
if err != nil {
349
-
log.Errorf("failed to get next writer: %s", err)
350
return err
351
}
352
···
360
}
361
362
if err := wc.Close(); err != nil {
363
-
log.Warnf("failed to flush-close our event write: %s", err)
364
return nil
365
}
366
···
401
402
var m = &dto.Metric{}
403
if err := c.EventsSent.Write(m); err != nil {
404
-
log.Errorf("failed to get sent counter: %s", err)
405
}
406
407
-
log.Infow("consumer disconnected",
408
"consumer_id", id,
409
"remote_addr", c.RemoteAddr,
410
"user_agent", c.UserAgent,
···
450
}
451
con, res, err := d.DialContext(ctx, url, header)
452
if err != nil {
453
-
log.Warnw("dialing failed", "host", host, "err", err, "backoff", backoff)
454
time.Sleep(sleepForBackoff(backoff))
455
backoff++
456
457
continue
458
}
459
460
-
log.Info("event subscription response code: ", res.StatusCode)
461
462
if err := s.handleConnection(ctx, host, con, &cursor); err != nil {
463
-
log.Warnf("connection to %q failed: %s", host, err)
464
}
465
}
466
}
···
483
if seq%5000 == 0 {
484
// TODO: don't need this after we move to getting seq from pebble
485
if err := s.writeCursor(seq); err != nil {
486
-
log.Errorf("write cursor failed: %s", err)
487
}
488
}
489
···
491
return nil
492
})
493
494
-
return events.HandleRepoStream(ctx, con, sched)
495
}
496
497
func (s *Splitter) getLastCursor() (int64, error) {
498
if s.pp != nil {
499
seq, millis, _, err := s.pp.GetLast(context.Background())
500
if err == nil {
501
-
log.Debugw("got last cursor from pebble", "seq", seq, "millis", millis)
502
return seq, nil
503
} else if errors.Is(err, events.ErrNoLast) {
504
-
log.Info("pebble no last")
505
} else {
506
-
log.Errorw("pebble seq fail", "err", err)
507
}
508
}
509
···
5
"errors"
6
"fmt"
7
"io"
8
+
"log/slog"
9
"math/rand"
10
"net"
11
"net/http"
···
19
events "github.com/bluesky-social/indigo/events"
20
"github.com/bluesky-social/indigo/events/schedulers/sequential"
21
"github.com/gorilla/websocket"
22
"github.com/labstack/echo/v4"
23
"github.com/labstack/echo/v4/middleware"
24
promclient "github.com/prometheus/client_golang/prometheus"
···
26
dto "github.com/prometheus/client_model/go"
27
)
28
29
type Splitter struct {
30
erb *EventRingBuffer
31
pp *events.PebblePersist
···
37
consumers map[uint64]*SocketConsumer
38
39
conf SplitterConfig
40
+
41
+
log *slog.Logger
42
}
43
44
type SplitterConfig struct {
···
61
erb: erb,
62
events: em,
63
consumers: make(map[uint64]*SocketConsumer),
64
+
log: slog.Default().With("system", "splitter"),
65
}
66
}
67
func NewSplitter(conf SplitterConfig) (*Splitter, error) {
···
75
erb: erb,
76
events: em,
77
consumers: make(map[uint64]*SocketConsumer),
78
+
log: slog.Default().With("system", "splitter"),
79
}, nil
80
} else {
81
pp, err := events.NewPebblePersistance(conf.PebbleOptions)
···
90
pp: pp,
91
events: em,
92
consumers: make(map[uint64]*SocketConsumer),
93
+
log: slog.Default().With("system", "splitter"),
94
}, nil
95
}
96
}
···
118
pp: pp,
119
events: em,
120
consumers: make(map[uint64]*SocketConsumer),
121
+
log: slog.Default().With("system", "splitter"),
122
}, nil
123
}
124
···
177
if err2 := ctx.JSON(err.Code, map[string]any{
178
"error": err.Message,
179
}); err2 != nil {
180
+
s.log.Error("Failed to write http error", "err", err2)
181
}
182
default:
183
sendHeader := true
···
185
sendHeader = false
186
}
187
188
+
s.log.Warn("HANDLER ERROR", "path", ctx.Path(), "err", err)
189
190
if strings.HasPrefix(ctx.Path(), "/admin/") {
191
ctx.JSON(500, map[string]any{
···
279
}
280
281
if err := conn.WriteControl(websocket.PingMessage, []byte{}, time.Now().Add(5*time.Second)); err != nil {
282
+
s.log.Error("failed to ping client", "err", err)
283
cancel()
284
return
285
}
···
304
for {
305
_, _, err := conn.ReadMessage()
306
if err != nil {
307
+
s.log.Error("failed to read message from client", "err", err)
308
cancel()
309
return
310
}
···
331
consumerID := s.registerConsumer(&consumer)
332
defer s.cleanupConsumer(consumerID)
333
334
+
s.log.Info("new consumer",
335
"remote_addr", consumer.RemoteAddr,
336
"user_agent", consumer.UserAgent,
337
"cursor", since,
···
344
select {
345
case evt, ok := <-evts:
346
if !ok {
347
+
s.log.Error("event stream closed unexpectedly")
348
return nil
349
}
350
351
wc, err := conn.NextWriter(websocket.BinaryMessage)
352
if err != nil {
353
+
s.log.Error("failed to get next writer", "err", err)
354
return err
355
}
356
···
364
}
365
366
if err := wc.Close(); err != nil {
367
+
s.log.Warn("failed to flush-close our event write", "err", err)
368
return nil
369
}
370
···
405
406
var m = &dto.Metric{}
407
if err := c.EventsSent.Write(m); err != nil {
408
+
s.log.Error("failed to get sent counter", "err", err)
409
}
410
411
+
s.log.Info("consumer disconnected",
412
"consumer_id", id,
413
"remote_addr", c.RemoteAddr,
414
"user_agent", c.UserAgent,
···
454
}
455
con, res, err := d.DialContext(ctx, url, header)
456
if err != nil {
457
+
s.log.Warn("dialing failed", "host", host, "err", err, "backoff", backoff)
458
time.Sleep(sleepForBackoff(backoff))
459
backoff++
460
461
continue
462
}
463
464
+
s.log.Info("event subscription response", "code", res.StatusCode)
465
466
if err := s.handleConnection(ctx, host, con, &cursor); err != nil {
467
+
s.log.Warn("connection failed", "host", host, "err", err)
468
}
469
}
470
}
···
487
if seq%5000 == 0 {
488
// TODO: don't need this after we move to getting seq from pebble
489
if err := s.writeCursor(seq); err != nil {
490
+
s.log.Error("write cursor failed", "err", err)
491
}
492
}
493
···
495
return nil
496
})
497
498
+
return events.HandleRepoStream(ctx, con, sched, nil)
499
}
500
501
func (s *Splitter) getLastCursor() (int64, error) {
502
if s.pp != nil {
503
seq, millis, _, err := s.pp.GetLast(context.Background())
504
if err == nil {
505
+
s.log.Debug("got last cursor from pebble", "seq", seq, "millis", millis)
506
return seq, nil
507
} else if errors.Is(err, events.ErrNoLast) {
508
+
s.log.Info("pebble no last")
509
} else {
510
+
s.log.Error("pebble seq fail", "err", err)
511
}
512
}
513
-5
testing/integ_test.go
-5
testing/integ_test.go
···
15
"github.com/bluesky-social/indigo/repo"
16
"github.com/bluesky-social/indigo/xrpc"
17
"github.com/ipfs/go-cid"
18
-
"github.com/ipfs/go-log/v2"
19
car "github.com/ipld/go-car"
20
"github.com/stretchr/testify/assert"
21
)
22
-
23
-
func init() {
24
-
log.SetAllLoggers(log.LevelInfo)
25
-
}
26
27
func TestRelayBasic(t *testing.T) {
28
if testing.Short() {
+1
-1
testing/utils.go
+1
-1
testing/utils.go