fork of indigo with slightly nicer lexgen

Refactor schedulers into subpackage

Changed files
+245 -180
bgs
cmd
events
search
testing
+2 -2
bgs/fedmgr.go
··· 10 11 comatproto "github.com/bluesky-social/indigo/api/atproto" 12 "github.com/bluesky-social/indigo/events" 13 - "github.com/bluesky-social/indigo/events/autoscaling" 14 "github.com/bluesky-social/indigo/models" 15 "go.opentelemetry.io/otel" 16 ··· 389 }, 390 } 391 392 - pool := autoscaling.NewConsumerPool(1, 360, con.RemoteAddr().String(), rsc.EventHandler) 393 return events.HandleRepoStream(ctx, con, pool) 394 } 395
··· 10 11 comatproto "github.com/bluesky-social/indigo/api/atproto" 12 "github.com/bluesky-social/indigo/events" 13 + "github.com/bluesky-social/indigo/events/schedulers/autoscaling" 14 "github.com/bluesky-social/indigo/models" 15 "go.opentelemetry.io/otel" 16 ··· 389 }, 390 } 391 392 + pool := autoscaling.NewScheduler(1, 360, time.Second, con.RemoteAddr().String(), rsc.EventHandler) 393 return events.HandleRepoStream(ctx, con, pool) 394 } 395
+7 -3
cmd/gosky/debug.go
··· 18 "github.com/bluesky-social/indigo/api/bsky" 19 "github.com/bluesky-social/indigo/did" 20 "github.com/bluesky-social/indigo/events" 21 lexutil "github.com/bluesky-social/indigo/lex/util" 22 "github.com/bluesky-social/indigo/repo" 23 "github.com/bluesky-social/indigo/repomgr" ··· 97 }, 98 } 99 100 - err = events.HandleRepoStream(ctx, con, &events.SequentialScheduler{rsc.EventHandler}) 101 if err != errFoundIt { 102 return err 103 } ··· 251 return fmt.Errorf("%s: %s", evt.Error, evt.Message) 252 }, 253 } 254 - err = events.HandleRepoStream(ctx, con, &events.SequentialScheduler{rsc.EventHandler}) 255 if err != nil { 256 return err 257 } ··· 371 return fmt.Errorf("%s: %s", evt.Error, evt.Message) 372 }, 373 } 374 - if err := events.HandleRepoStream(ctx, con, &events.SequentialScheduler{rsc.EventHandler}); err != nil { 375 log.Fatalf("HandleRepoStream failure on url%d: %s", i+1, err) 376 } 377 }(i, url)
··· 18 "github.com/bluesky-social/indigo/api/bsky" 19 "github.com/bluesky-social/indigo/did" 20 "github.com/bluesky-social/indigo/events" 21 + "github.com/bluesky-social/indigo/events/schedulers/sequential" 22 lexutil "github.com/bluesky-social/indigo/lex/util" 23 "github.com/bluesky-social/indigo/repo" 24 "github.com/bluesky-social/indigo/repomgr" ··· 98 }, 99 } 100 101 + seqScheduler := sequential.NewScheduler("debug-inspect-event", rsc.EventHandler) 102 + err = events.HandleRepoStream(ctx, con, seqScheduler) 103 if err != errFoundIt { 104 return err 105 } ··· 253 return fmt.Errorf("%s: %s", evt.Error, evt.Message) 254 }, 255 } 256 + seqScheduler := sequential.NewScheduler("debug-stream", rsc.EventHandler) 257 + err = events.HandleRepoStream(ctx, con, seqScheduler) 258 if err != nil { 259 return err 260 } ··· 374 return fmt.Errorf("%s: %s", evt.Error, evt.Message) 375 }, 376 } 377 + seqScheduler := sequential.NewScheduler(fmt.Sprintf("debug-stream-%d", i+1), rsc.EventHandler) 378 + if err := events.HandleRepoStream(ctx, con, seqScheduler); err != nil { 379 log.Fatalf("HandleRepoStream failure on url%d: %s", i+1, err) 380 } 381 }(i, url)
+3 -1
cmd/gosky/main.go
··· 20 "github.com/bluesky-social/indigo/api/bsky" 21 appbsky "github.com/bluesky-social/indigo/api/bsky" 22 "github.com/bluesky-social/indigo/events" 23 lexutil "github.com/bluesky-social/indigo/lex/util" 24 "github.com/bluesky-social/indigo/repo" 25 "github.com/bluesky-social/indigo/util" ··· 1098 return fmt.Errorf("error frame: %s: %s", errf.Error, errf.Message) 1099 }, 1100 } 1101 - return events.HandleRepoStream(ctx, con, &events.SequentialScheduler{rsc.EventHandler}) 1102 }, 1103 } 1104
··· 20 "github.com/bluesky-social/indigo/api/bsky" 21 appbsky "github.com/bluesky-social/indigo/api/bsky" 22 "github.com/bluesky-social/indigo/events" 23 + "github.com/bluesky-social/indigo/events/schedulers/sequential" 24 lexutil "github.com/bluesky-social/indigo/lex/util" 25 "github.com/bluesky-social/indigo/repo" 26 "github.com/bluesky-social/indigo/util" ··· 1099 return fmt.Errorf("error frame: %s: %s", errf.Error, errf.Message) 1100 }, 1101 } 1102 + seqScheduler := sequential.NewScheduler(con.RemoteAddr().String(), rsc.EventHandler) 1103 + return events.HandleRepoStream(ctx, con, seqScheduler) 1104 }, 1105 } 1106
+7 -3
cmd/gosky/streamdiff.go
··· 7 8 comatproto "github.com/bluesky-social/indigo/api/atproto" 9 "github.com/bluesky-social/indigo/events" 10 "github.com/gorilla/websocket" 11 cli "github.com/urfave/cli/v2" 12 ) ··· 55 return fmt.Errorf("%s: %s", evt.Error, evt.Message) 56 }, 57 } 58 - err = events.HandleRepoStream(ctx, cona, &events.SequentialScheduler{rsc.EventHandler}) 59 if err != nil { 60 log.Errorf("stream A failed: %s", err) 61 } ··· 77 return fmt.Errorf("%s: %s", evt.Error, evt.Message) 78 }, 79 } 80 - err = events.HandleRepoStream(ctx, conb, &events.SequentialScheduler{rsc.EventHandler}) 81 if err != nil { 82 - log.Errorf("stream A failed: %s", err) 83 } 84 }() 85
··· 7 8 comatproto "github.com/bluesky-social/indigo/api/atproto" 9 "github.com/bluesky-social/indigo/events" 10 + "github.com/bluesky-social/indigo/events/schedulers/sequential" 11 "github.com/gorilla/websocket" 12 cli "github.com/urfave/cli/v2" 13 ) ··· 56 return fmt.Errorf("%s: %s", evt.Error, evt.Message) 57 }, 58 } 59 + seqScheduler := sequential.NewScheduler("streamA", rsc.EventHandler) 60 + err = events.HandleRepoStream(ctx, cona, seqScheduler) 61 if err != nil { 62 log.Errorf("stream A failed: %s", err) 63 } ··· 79 return fmt.Errorf("%s: %s", evt.Error, evt.Message) 80 }, 81 } 82 + 83 + seqScheduler := sequential.NewScheduler("streamB", rsc.EventHandler) 84 + err = events.HandleRepoStream(ctx, conb, seqScheduler) 85 if err != nil { 86 + log.Errorf("stream B failed: %s", err) 87 } 88 }() 89
+2 -1
cmd/sonar/main.go
··· 13 "time" 14 15 "github.com/bluesky-social/indigo/events" 16 "github.com/bluesky-social/indigo/sonar" 17 "github.com/bluesky-social/indigo/util/version" 18 "github.com/gorilla/websocket" ··· 108 109 wg := sync.WaitGroup{} 110 111 - pool := events.NewConsumerPool(cctx.Int("worker-count"), cctx.Int("max-queue-size"), u.Host, s.HandleStreamEvent) 112 113 // Start a goroutine to manage the cursor file, saving the current cursor every 5 seconds. 114 go func() {
··· 13 "time" 14 15 "github.com/bluesky-social/indigo/events" 16 + "github.com/bluesky-social/indigo/events/schedulers/autoscaling" 17 "github.com/bluesky-social/indigo/sonar" 18 "github.com/bluesky-social/indigo/util/version" 19 "github.com/gorilla/websocket" ··· 109 110 wg := sync.WaitGroup{} 111 112 + pool := autoscaling.NewScheduler(cctx.Int("worker-count"), cctx.Int("max-queue-size"), time.Second, u.Host, s.HandleStreamEvent) 113 114 // Start a goroutine to manage the cursor file, saving the current cursor every 5 seconds. 115 go func() {
+20 -16
events/autoscaling/autoscaling.go events/schedulers/autoscaling/autoscaling.go
··· 6 "time" 7 8 "github.com/bluesky-social/indigo/events" 9 "github.com/labstack/gommon/log" 10 "github.com/prometheus/client_golang/prometheus" 11 ) 12 13 - type ConsumerPool struct { 14 concurrency int 15 maxConcurrency int 16 ··· 27 itemsAdded prometheus.Counter 28 itemsProcessed prometheus.Counter 29 itemsActive prometheus.Counter 30 - workersAcrive prometheus.Gauge 31 32 // autoscaling 33 - throughputManager *ThroughputManager 34 } 35 36 - func NewConsumerPool(concurrency, maxC int, ident string, do func(context.Context, *events.XRPCStreamEvent) error) *ConsumerPool { 37 - p := &ConsumerPool{ 38 concurrency: concurrency, 39 maxConcurrency: maxC, 40 ··· 45 46 ident: ident, 47 48 - itemsAdded: workItemsAdded.WithLabelValues(ident, "autoscaling"), 49 - itemsProcessed: workItemsProcessed.WithLabelValues(ident, "autoscaling"), 50 - itemsActive: workItemsActive.WithLabelValues(ident, "autoscaling"), 51 - workersAcrive: workersActive.WithLabelValues(ident, "autoscaling"), 52 53 // autoscaling 54 // By default, the ThroughputManager will calculate the average throughput over the last 60 seconds. 55 - throughputManager: NewThroughputManager(60), 56 } 57 58 for i := 0; i < concurrency; i++ { ··· 65 } 66 67 // Add autoscaling function 68 - func (p *ConsumerPool) autoscale() { 69 p.throughputManager.Start() 70 - tick := time.NewTicker(time.Second * 5) // adjust as needed 71 for range tick.C { 72 avg := p.throughputManager.AvgThroughput() 73 if avg > float64(p.concurrency) && p.concurrency < p.maxConcurrency { ··· 86 signal string 87 } 88 89 - func (p *ConsumerPool) AddWork(ctx context.Context, repo string, val *events.XRPCStreamEvent) error { 90 p.itemsAdded.Inc() 91 p.throughputManager.Add(1) 92 t := &consumerTask{ ··· 113 } 114 } 115 116 - func (p *ConsumerPool) worker() { 117 log.Infof("starting autoscaling worker for %s", p.ident) 118 - p.workersAcrive.Inc() 119 for work := range p.feeder { 120 for work != nil { 121 // Check if the work item contains a signal to stop the worker. 122 if work.signal == "stop" { 123 log.Infof("stopping autoscaling worker for %s", p.ident) 124 - p.workersAcrive.Dec() 125 return 126 } 127
··· 6 "time" 7 8 "github.com/bluesky-social/indigo/events" 9 + "github.com/bluesky-social/indigo/events/schedulers" 10 "github.com/labstack/gommon/log" 11 "github.com/prometheus/client_golang/prometheus" 12 ) 13 14 + // Scheduler is a scheduler that will scale up and down the number of workers based on the throughput of the workers. 15 + type Scheduler struct { 16 concurrency int 17 maxConcurrency int 18 ··· 29 itemsAdded prometheus.Counter 30 itemsProcessed prometheus.Counter 31 itemsActive prometheus.Counter 32 + workersActive prometheus.Gauge 33 34 // autoscaling 35 + throughputManager *ThroughputManager 36 + autoscaleFrequency time.Duration 37 } 38 39 + func NewScheduler(concurrency, maxC int, autoscaleFrequency time.Duration, ident string, do func(context.Context, *events.XRPCStreamEvent) error) *Scheduler { 40 + p := &Scheduler{ 41 concurrency: concurrency, 42 maxConcurrency: maxC, 43 ··· 48 49 ident: ident, 50 51 + itemsAdded: schedulers.WorkItemsAdded.WithLabelValues(ident, "autoscaling"), 52 + itemsProcessed: schedulers.WorkItemsProcessed.WithLabelValues(ident, "autoscaling"), 53 + itemsActive: schedulers.WorkItemsActive.WithLabelValues(ident, "autoscaling"), 54 + workersActive: schedulers.WorkersActive.WithLabelValues(ident, "autoscaling"), 55 56 // autoscaling 57 // By default, the ThroughputManager will calculate the average throughput over the last 60 seconds. 58 + throughputManager: NewThroughputManager(60), 59 + autoscaleFrequency: autoscaleFrequency, 60 } 61 62 for i := 0; i < concurrency; i++ { ··· 69 } 70 71 // Add autoscaling function 72 + func (p *Scheduler) autoscale() { 73 p.throughputManager.Start() 74 + tick := time.NewTicker(p.autoscaleFrequency) 75 for range tick.C { 76 avg := p.throughputManager.AvgThroughput() 77 if avg > float64(p.concurrency) && p.concurrency < p.maxConcurrency { ··· 90 signal string 91 } 92 93 + func (p *Scheduler) AddWork(ctx context.Context, repo string, val *events.XRPCStreamEvent) error { 94 p.itemsAdded.Inc() 95 p.throughputManager.Add(1) 96 t := &consumerTask{ ··· 117 } 118 } 119 120 + func (p *Scheduler) worker() { 121 log.Infof("starting autoscaling worker for %s", p.ident) 122 + p.workersActive.Inc() 123 for work := range p.feeder { 124 for work != nil { 125 // Check if the work item contains a signal to stop the worker. 126 if work.signal == "stop" { 127 log.Infof("stopping autoscaling worker for %s", p.ident) 128 + p.workersActive.Dec() 129 return 130 } 131
-26
events/autoscaling/metrics.go
··· 1 - package autoscaling 2 - 3 - import ( 4 - "github.com/prometheus/client_golang/prometheus" 5 - "github.com/prometheus/client_golang/prometheus/promauto" 6 - ) 7 - 8 - var workItemsAdded = promauto.NewCounterVec(prometheus.CounterOpts{ 9 - Name: "indigo_pool_work_items_added_total", 10 - Help: "Total number of work items added to the consumer pool", 11 - }, []string{"pool", "pool_type"}) 12 - 13 - var workItemsProcessed = promauto.NewCounterVec(prometheus.CounterOpts{ 14 - Name: "indigo_pool_work_items_processed_total", 15 - Help: "Total number of work items processed by the consumer pool", 16 - }, []string{"pool", "pool_type"}) 17 - 18 - var workItemsActive = promauto.NewCounterVec(prometheus.CounterOpts{ 19 - Name: "indigo_pool_work_items_active_total", 20 - Help: "Total number of work items passed into a worker", 21 - }, []string{"pool", "pool_type"}) 22 - 23 - var workersActive = promauto.NewGaugeVec(prometheus.GaugeOpts{ 24 - Name: "indigo_pool_workers_active", 25 - Help: "Number of workers currently active", 26 - }, []string{"pool", "pool_type"})
···
events/autoscaling/throughput.go events/schedulers/autoscaling/throughput.go
+4
events/events.go
··· 17 18 var log = logging.Logger("events") 19 20 type EventManager struct { 21 subs []*Subscriber 22 subsLk sync.Mutex
··· 17 18 var log = logging.Logger("events") 19 20 + type Scheduler interface { 21 + AddWork(ctx context.Context, repo string, val *XRPCStreamEvent) error 22 + } 23 + 24 type EventManager struct { 25 subs []*Subscriber 26 subsLk sync.Mutex
-15
events/metrics.go
··· 15 Help: "Total bytes received from the stream", 16 }, []string{"remote_addr"}) 17 18 - var workItemsAdded = promauto.NewCounterVec(prometheus.CounterOpts{ 19 - Name: "indigo_work_items_added_total", 20 - Help: "Total number of work items added to the consumer pool", 21 - }, []string{"pool"}) 22 - 23 - var workItemsProcessed = promauto.NewCounterVec(prometheus.CounterOpts{ 24 - Name: "indigo_work_items_processed_total", 25 - Help: "Total number of work items processed by the consumer pool", 26 - }, []string{"pool"}) 27 - 28 - var workItemsActive = promauto.NewCounterVec(prometheus.CounterOpts{ 29 - Name: "indigo_work_items_active_total", 30 - Help: "Total number of work items passed into a worker", 31 - }, []string{"pool"}) 32 - 33 var eventsEnqueued = promauto.NewCounterVec(prometheus.CounterOpts{ 34 Name: "indigo_events_enqueued_for_broadcast_total", 35 Help: "Total number of events enqueued to broadcast to subscribers",
··· 15 Help: "Total bytes received from the stream", 16 }, []string{"remote_addr"}) 17 18 var eventsEnqueued = promauto.NewCounterVec(prometheus.CounterOpts{ 19 Name: "indigo_events_enqueued_for_broadcast_total", 20 Help: "Total number of events enqueued to broadcast to subscribers",
-110
events/parallel.go
··· 1 - package events 2 - 3 - import ( 4 - "context" 5 - "sync" 6 - ) 7 - 8 - type Scheduler interface { 9 - AddWork(ctx context.Context, repo string, val *XRPCStreamEvent) error 10 - } 11 - 12 - type SequentialScheduler struct { 13 - Do func(context.Context, *XRPCStreamEvent) error 14 - } 15 - 16 - func (s *SequentialScheduler) AddWork(ctx context.Context, repo string, val *XRPCStreamEvent) error { 17 - return s.Do(ctx, val) 18 - } 19 - 20 - type ParallelConsumerPool struct { 21 - maxConcurrency int 22 - maxQueue int 23 - 24 - do func(context.Context, *XRPCStreamEvent) error 25 - 26 - feeder chan *consumerTask 27 - 28 - lk sync.Mutex 29 - active map[string][]*consumerTask 30 - 31 - ident string 32 - } 33 - 34 - func NewConsumerPool(maxC, maxQ int, ident string, do func(context.Context, *XRPCStreamEvent) error) *ParallelConsumerPool { 35 - p := &ParallelConsumerPool{ 36 - maxConcurrency: maxC, 37 - maxQueue: maxQ, 38 - 39 - do: do, 40 - 41 - feeder: make(chan *consumerTask), 42 - active: make(map[string][]*consumerTask), 43 - 44 - ident: ident, 45 - } 46 - 47 - for i := 0; i < maxC; i++ { 48 - go p.worker() 49 - } 50 - 51 - return p 52 - } 53 - 54 - type consumerTask struct { 55 - repo string 56 - val *XRPCStreamEvent 57 - } 58 - 59 - func (p *ParallelConsumerPool) AddWork(ctx context.Context, repo string, val *XRPCStreamEvent) error { 60 - workItemsAdded.WithLabelValues(p.ident).Inc() 61 - t := &consumerTask{ 62 - repo: repo, 63 - val: val, 64 - } 65 - p.lk.Lock() 66 - 67 - a, ok := p.active[repo] 68 - if ok { 69 - p.active[repo] = append(a, t) 70 - p.lk.Unlock() 71 - return nil 72 - } 73 - 74 - p.active[repo] = []*consumerTask{} 75 - p.lk.Unlock() 76 - 77 - select { 78 - case p.feeder <- t: 79 - return nil 80 - case <-ctx.Done(): 81 - return ctx.Err() 82 - } 83 - } 84 - 85 - func (p *ParallelConsumerPool) worker() { 86 - for work := range p.feeder { 87 - for work != nil { 88 - workItemsActive.WithLabelValues(p.ident).Inc() 89 - if err := p.do(context.TODO(), work.val); err != nil { 90 - log.Errorf("event handler failed: %s", err) 91 - } 92 - workItemsProcessed.WithLabelValues(p.ident).Inc() 93 - 94 - p.lk.Lock() 95 - rem, ok := p.active[work.repo] 96 - if !ok { 97 - log.Errorf("should always have an 'active' entry if a worker is processing a job") 98 - } 99 - 100 - if len(rem) == 0 { 101 - delete(p.active, work.repo) 102 - work = nil 103 - } else { 104 - work = rem[0] 105 - p.active[work.repo] = rem[1:] 106 - } 107 - p.lk.Unlock() 108 - } 109 - } 110 - }
···
+26
events/schedulers/metrics.go
···
··· 1 + package schedulers 2 + 3 + import ( 4 + "github.com/prometheus/client_golang/prometheus" 5 + "github.com/prometheus/client_golang/prometheus/promauto" 6 + ) 7 + 8 + var WorkItemsAdded = promauto.NewCounterVec(prometheus.CounterOpts{ 9 + Name: "indigo_scheduler_work_items_added_total", 10 + Help: "Total number of work items added to the consumer pool", 11 + }, []string{"pool", "scheduler_type"}) 12 + 13 + var WorkItemsProcessed = promauto.NewCounterVec(prometheus.CounterOpts{ 14 + Name: "indigo_scheduler_work_items_processed_total", 15 + Help: "Total number of work items processed by the consumer pool", 16 + }, []string{"pool", "scheduler_type"}) 17 + 18 + var WorkItemsActive = promauto.NewCounterVec(prometheus.CounterOpts{ 19 + Name: "indigo_scheduler_work_items_active_total", 20 + Help: "Total number of work items passed into a worker", 21 + }, []string{"pool", "scheduler_type"}) 22 + 23 + var WorkersActive = promauto.NewGaugeVec(prometheus.GaugeOpts{ 24 + Name: "indigo_scheduler_workers_active", 25 + Help: "Number of workers currently active", 26 + }, []string{"pool", "scheduler_type"})
+117
events/schedulers/parallel/pool.go
···
··· 1 + package parallel 2 + 3 + import ( 4 + "context" 5 + "sync" 6 + 7 + "github.com/bluesky-social/indigo/events" 8 + "github.com/bluesky-social/indigo/events/schedulers" 9 + "github.com/labstack/gommon/log" 10 + "github.com/prometheus/client_golang/prometheus" 11 + ) 12 + 13 + // Scheduler is a parallel scheduler that will run work on a fixed number of workers 14 + type Scheduler struct { 15 + maxConcurrency int 16 + maxQueue int 17 + 18 + do func(context.Context, *events.XRPCStreamEvent) error 19 + 20 + feeder chan *consumerTask 21 + 22 + lk sync.Mutex 23 + active map[string][]*consumerTask 24 + 25 + ident string 26 + 27 + // metrics 28 + itemsAdded prometheus.Counter 29 + itemsProcessed prometheus.Counter 30 + itemsActive prometheus.Counter 31 + workesActive prometheus.Gauge 32 + } 33 + 34 + func NewScheduler(maxC, maxQ int, ident string, do func(context.Context, *events.XRPCStreamEvent) error) *Scheduler { 35 + p := &Scheduler{ 36 + maxConcurrency: maxC, 37 + maxQueue: maxQ, 38 + 39 + do: do, 40 + 41 + feeder: make(chan *consumerTask), 42 + active: make(map[string][]*consumerTask), 43 + 44 + ident: ident, 45 + 46 + itemsAdded: schedulers.WorkItemsAdded.WithLabelValues(ident, "parallel"), 47 + itemsProcessed: schedulers.WorkItemsProcessed.WithLabelValues(ident, "parallel"), 48 + itemsActive: schedulers.WorkItemsActive.WithLabelValues(ident, "parallel"), 49 + workesActive: schedulers.WorkersActive.WithLabelValues(ident, "parallel"), 50 + } 51 + 52 + for i := 0; i < maxC; i++ { 53 + go p.worker() 54 + } 55 + 56 + p.workesActive.Set(float64(maxC)) 57 + 58 + return p 59 + } 60 + 61 + type consumerTask struct { 62 + repo string 63 + val *events.XRPCStreamEvent 64 + } 65 + 66 + func (p *Scheduler) AddWork(ctx context.Context, repo string, val *events.XRPCStreamEvent) error { 67 + p.itemsAdded.Inc() 68 + t := &consumerTask{ 69 + repo: repo, 70 + val: val, 71 + } 72 + p.lk.Lock() 73 + 74 + a, ok := p.active[repo] 75 + if ok { 76 + p.active[repo] = append(a, t) 77 + p.lk.Unlock() 78 + return nil 79 + } 80 + 81 + p.active[repo] = []*consumerTask{} 82 + p.lk.Unlock() 83 + 84 + select { 85 + case p.feeder <- t: 86 + return nil 87 + case <-ctx.Done(): 88 + return ctx.Err() 89 + } 90 + } 91 + 92 + func (p *Scheduler) worker() { 93 + for work := range p.feeder { 94 + for work != nil { 95 + p.itemsActive.Inc() 96 + if err := p.do(context.TODO(), work.val); err != nil { 97 + log.Errorf("event handler failed: %s", err) 98 + } 99 + p.itemsProcessed.Inc() 100 + 101 + p.lk.Lock() 102 + rem, ok := p.active[work.repo] 103 + if !ok { 104 + log.Errorf("should always have an 'active' entry if a worker is processing a job") 105 + } 106 + 107 + if len(rem) == 0 { 108 + delete(p.active, work.repo) 109 + work = nil 110 + } else { 111 + work = rem[0] 112 + p.active[work.repo] = rem[1:] 113 + } 114 + p.lk.Unlock() 115 + } 116 + } 117 + }
+1
events/schedulers/scheduler.go
···
··· 1 + package schedulers
+47
events/schedulers/sequential/sequential.go
···
··· 1 + package sequential 2 + 3 + import ( 4 + "context" 5 + 6 + "github.com/bluesky-social/indigo/events" 7 + "github.com/bluesky-social/indigo/events/schedulers" 8 + "github.com/prometheus/client_golang/prometheus" 9 + ) 10 + 11 + // Scheduler is a sequential scheduler that will run work on a single worker 12 + type Scheduler struct { 13 + Do func(context.Context, *events.XRPCStreamEvent) error 14 + 15 + ident string 16 + 17 + // metrics 18 + itemsAdded prometheus.Counter 19 + itemsProcessed prometheus.Counter 20 + itemsActive prometheus.Counter 21 + workersActive prometheus.Gauge 22 + } 23 + 24 + func NewScheduler(ident string, do func(context.Context, *events.XRPCStreamEvent) error) *Scheduler { 25 + p := &Scheduler{ 26 + Do: do, 27 + 28 + ident: ident, 29 + 30 + itemsAdded: schedulers.WorkItemsAdded.WithLabelValues(ident, "sequential"), 31 + itemsProcessed: schedulers.WorkItemsProcessed.WithLabelValues(ident, "sequential"), 32 + itemsActive: schedulers.WorkItemsActive.WithLabelValues(ident, "sequential"), 33 + workersActive: schedulers.WorkersActive.WithLabelValues(ident, "sequential"), 34 + } 35 + 36 + p.workersActive.Set(1) 37 + 38 + return p 39 + } 40 + 41 + func (s *Scheduler) AddWork(ctx context.Context, repo string, val *events.XRPCStreamEvent) error { 42 + s.itemsAdded.Inc() 43 + s.itemsActive.Inc() 44 + err := s.Do(ctx, val) 45 + s.itemsProcessed.Inc() 46 + return err 47 + }
+3 -1
search/server.go
··· 9 "net/http" 10 "strconv" 11 "strings" 12 13 api "github.com/bluesky-social/indigo/api" 14 comatproto "github.com/bluesky-social/indigo/api/atproto" 15 bsky "github.com/bluesky-social/indigo/api/bsky" 16 "github.com/bluesky-social/indigo/events" 17 lexutil "github.com/bluesky-social/indigo/lex/util" 18 "github.com/bluesky-social/indigo/repo" 19 "github.com/bluesky-social/indigo/repomgr" ··· 195 }, 196 } 197 198 - return events.HandleRepoStream(ctx, con, events.NewConsumerPool(8, 32, s.bgshost, rsc.EventHandler)) 199 } 200 201 func (s *Server) handleOp(ctx context.Context, op repomgr.EventKind, seq int64, path string, did string, rcid *cid.Cid, rec any) error {
··· 9 "net/http" 10 "strconv" 11 "strings" 12 + "time" 13 14 api "github.com/bluesky-social/indigo/api" 15 comatproto "github.com/bluesky-social/indigo/api/atproto" 16 bsky "github.com/bluesky-social/indigo/api/bsky" 17 "github.com/bluesky-social/indigo/events" 18 + "github.com/bluesky-social/indigo/events/schedulers/autoscaling" 19 lexutil "github.com/bluesky-social/indigo/lex/util" 20 "github.com/bluesky-social/indigo/repo" 21 "github.com/bluesky-social/indigo/repomgr" ··· 197 }, 198 } 199 200 + return events.HandleRepoStream(ctx, con, autoscaling.NewScheduler(1, 32, time.Second, s.bgshost, rsc.EventHandler)) 201 } 202 203 func (s *Server) handleOp(ctx context.Context, op repomgr.EventKind, seq int64, path string, did string, rcid *cid.Cid, rec any) error {
+3 -1
testing/labelmaker_fakedata_test.go
··· 13 label "github.com/bluesky-social/indigo/api/label" 14 "github.com/bluesky-social/indigo/carstore" 15 "github.com/bluesky-social/indigo/events" 16 "github.com/bluesky-social/indigo/labeler" 17 "github.com/bluesky-social/indigo/util" 18 "github.com/bluesky-social/indigo/xrpc" ··· 109 return nil 110 }, 111 } 112 - if err := events.HandleRepoStream(ctx, con, &events.SequentialScheduler{rsc.EventHandler}); err != nil { 113 fmt.Println(err) 114 } 115 }()
··· 13 label "github.com/bluesky-social/indigo/api/label" 14 "github.com/bluesky-social/indigo/carstore" 15 "github.com/bluesky-social/indigo/events" 16 + "github.com/bluesky-social/indigo/events/schedulers/sequential" 17 "github.com/bluesky-social/indigo/labeler" 18 "github.com/bluesky-social/indigo/util" 19 "github.com/bluesky-social/indigo/xrpc" ··· 110 return nil 111 }, 112 } 113 + seqScheduler := sequential.NewScheduler("test", rsc.EventHandler) 114 + if err := events.HandleRepoStream(ctx, con, seqScheduler); err != nil { 115 fmt.Println(err) 116 } 117 }()
+3 -1
testing/utils.go
··· 23 "github.com/bluesky-social/indigo/bgs" 24 "github.com/bluesky-social/indigo/carstore" 25 "github.com/bluesky-social/indigo/events" 26 "github.com/bluesky-social/indigo/indexer" 27 lexutil "github.com/bluesky-social/indigo/lex/util" 28 "github.com/bluesky-social/indigo/models" ··· 540 return nil 541 }, 542 } 543 - if err := events.HandleRepoStream(ctx, con, &events.SequentialScheduler{rsc.EventHandler}); err != nil { 544 fmt.Println(err) 545 } 546 }()
··· 23 "github.com/bluesky-social/indigo/bgs" 24 "github.com/bluesky-social/indigo/carstore" 25 "github.com/bluesky-social/indigo/events" 26 + "github.com/bluesky-social/indigo/events/schedulers/sequential" 27 "github.com/bluesky-social/indigo/indexer" 28 lexutil "github.com/bluesky-social/indigo/lex/util" 29 "github.com/bluesky-social/indigo/models" ··· 541 return nil 542 }, 543 } 544 + seqScheduler := sequential.NewScheduler("test", rsc.EventHandler) 545 + if err := events.HandleRepoStream(ctx, con, seqScheduler); err != nil { 546 fmt.Println(err) 547 } 548 }()