relay: code tweaks, README, and helper scripts for self-hosting (#689)

+3

.gitignore

··· 46 46 out/ 47 47 state.json 48 48 netsync-out/ 49 + 50 + # Relay dash output 51 + /public/

+6

Makefile

··· 97 97 build-relay-image: ## Builds 'bigsky' Relay docker image 98 98 docker build -t bigsky -f cmd/bigsky/Dockerfile . 99 99 100 + .PHONY: build-relay-ui 101 + build-relay-ui: ## Build Relay dash web app 102 + cd ts/bgs-dash; yarn install --frozen-lockfile; yarn build 103 + mkdir -p public 104 + cp -r ts/bgs-dash/dist/* public/ 105 + 100 106 .PHONY: run-relay-image 101 107 run-relay-image: 102 108 docker run -p 2470:2470 bigsky /bigsky --admin-key localdev

+1

README.md

··· 10 10 11 11 - **bigsky** ([README](./cmd/bigsky/README.md)): "Big Graph Service" (BGS) reference implementation, running at `bsky.network` 12 12 - **palomar** ([README](./cmd/palomar/README.md)): fulltext search service for <https://bsky.app> 13 + - **hepa** ([README](./cmd/hepa/README.md)): auto-moderation bot for [Ozone](https://ozone.tools) 13 14 14 15 **Go Packages:** 15 16

+4 -3

bgs/bgs.go

··· 276 276 277 277 // React uses a virtual router, so we need to serve the index.html for all 278 278 // routes that aren't otherwise handled or in the /assets directory. 279 - e.File("/dash", "/public/index.html") 280 - e.File("/dash/*", "/public/index.html") 281 - e.Static("/assets", "/public/assets") 279 + e.File("/dash", "public/index.html") 280 + e.File("/dash/*", "public/index.html") 281 + e.Static("/assets", "public/assets") 282 282 283 283 e.Use(MetricsMiddleware) 284 284 ··· 323 323 e.GET("/xrpc/com.atproto.sync.getLatestCommit", bgs.HandleComAtprotoSyncGetLatestCommit) 324 324 e.GET("/xrpc/com.atproto.sync.notifyOfUpdate", bgs.HandleComAtprotoSyncNotifyOfUpdate) 325 325 e.GET("/xrpc/_health", bgs.HandleHealthCheck) 326 + e.GET("/_health", bgs.HandleHealthCheck) 326 327 327 328 admin := e.Group("/admin", bgs.checkAdminAuth) 328 329

+131 -12

cmd/bigsky/README.md

··· 1 1 2 - bigsky: Big Graph Server (BGS), aka Relay 3 - ========================================= 2 + `bigsky`: atproto Relay Service 3 + =============================== 4 + 5 + *NOTE: "Relays" used to be called "Big Graph Servers", or "BGS", which inspired the name "bigsky". Many variables and packages still reference "bgs"* 6 + 7 + This is the implementation of an atproto Relay which is running in the production network, written and operated by Bluesky. 8 + 9 + In atproto, a Relay subscribes to multiple PDS hosts and outputs a combined "firehose" event stream. Downstream services can subscribe to this single firehose a get all relevant events for the entire network, or a specific sub-graph of the network. The Relay maintains a mirror of repo data from all accounts on the upstream PDS instances, and verifies repo data structure integrity and identity signatures. It is agnostic to applications, and does not validate data against atproto Lexicon schemas. 10 + 11 + This Relay implementation is designed to subscribe to the entire global network. The current state of the codebase is informally expected to scale to around 20 million accounts in the network, and thousands of repo events per second (peak). 12 + 13 + Features and design decisions: 14 + 15 + - runs on a single server 16 + - repo data: stored on-disk in individual CAR "slice" files, with metadata in SQL. filesystem must accommodate tens of millions of small files 17 + - firehose backfill data: stored on-disk by default, with metadata in SQL 18 + - crawling and account state: stored in SQL database 19 + - SQL driver: gorm, with PostgreSQL in production and sqlite for testing 20 + - disk I/O intensive: fast NVMe disks are recommended, and RAM is helpful for caching 21 + - highly concurrent: not particularly CPU intensive 22 + - single golang binary for easy deployment 23 + - observability: logging, prometheus metrics, OTEL traces 24 + - "spidering" feature to auto-discover new accounts (DIDs) 25 + - ability to export/import lists of DIDs to "backfill" Relay instances 26 + - periodic repo compaction 27 + - admin web interface: configure limits, add upstream PDS instances, etc 28 + 29 + This software is not as packaged, documented, and supported for self-hosting as our PDS distribution or Ozone service. But it is relatively simple and inexpensive to get running. 30 + 31 + A note and reminder about Relays in general are that they are more of a convenience in the protocol than a hard requirement. The "firehose" API is the exact same on the PDS and on a Relay. Any service which subscribes to the Relay could instead connect to one or more PDS instances directly. 32 + 33 + 34 + ## Development Tips 35 + 36 + The README and Makefile at the top level of this git repo have some generic helpers for testing, linting, formatting code, etc. 37 + 38 + To re-build and run the bigsky Relay locally: 39 + 40 + make run-dev-relay 41 + 42 + You can re-build and run the command directly to get a list of configuration flags and env vars; env vars will be loaded from `.env` if that file exists: 43 + 44 + RELAY_ADMIN_KEY=localdev go run ./cmd/bigsky/ --help 45 + 46 + By default, the daemon will use sqlite for databases (in the directory `./data/bigsky/`), CAR data will be stored as individual shard files in `./data/bigsky/carstore/`), and the HTTP API will be bound to localhost port 2470. 47 + 48 + When the daemon isn't running, sqlite database files can be inspected with: 49 + 50 + sqlite3 data/bigsky/bgs.sqlite 51 + [...] 52 + sqlite> .schema 53 + 54 + Wipe all local data: 55 + 56 + # careful! double-check this destructive command 57 + rm -rf ./data/bigsky/* 58 + 59 + There is a basic web dashboard, though it will not be included unless built and copied to a local directory `./public/`. Run `make build-relay-ui`, and then when running the daemon the dashboard will be available at: <http://localhost:2470/admin/>. Paste in the admin key, eg `localdev`. 60 + 61 + The local admin routes can also be accessed by passing the admin key as a bearer token, for example: 62 + 63 + http get :2470/admin/pds/list Authorization:"Bearer localdev" 64 + 65 + Request crawl of an individual PDS instance like: 66 + 67 + http post :2470/admin/pds/requestCrawl Authorization:"Bearer localdev" hostname=pds.example.com 68 + 69 + 70 + ## Docker Containers 71 + 72 + One way to deploy is running a docker image. You can pull and/or run a specific version of bigsky, referenced by git commit, from the Bluesky Github container registry. For example: 73 + 74 + docker pull ghcr.io/bluesky-social/indigo:bigsky-fd66f93ce1412a3678a1dd3e6d53320b725978a6 75 + docker run ghcr.io/bluesky-social/indigo:bigsky-fd66f93ce1412a3678a1dd3e6d53320b725978a6 76 + 77 + There is a Dockerfile in this directory, which can be used to build customized/patched versions of the Relay as a container, republish them, run locally, deploy to servers, deploy to an orchestrated cluster, etc. See docs and guides for docker and cluster management systems for details. 78 + 4 79 5 80 ## Database Setup 6 81 7 - PostgreSQL and Sqlite are both supported. When using Sqlite, separate database 8 - for the BGS database itself and the CarStore are used. With PostgreSQL a single 9 - database server, user, and database, can all be reused. 82 + PostgreSQL and Sqlite are both supported. When using Sqlite, separate files are used for Relay metadata and CarStore metadata. With PostgreSQL a single database server, user, and logical database can all be reused: table names will not conflict. 10 83 11 - Database configuration is passed via the `DATABASE_URL` and 12 - `CARSTORE_DATABASE_URL` environment variables, or the corresponding CLI args. 84 + Database configuration is passed via the `DATABASE_URL` and `CARSTORE_DATABASE_URL` environment variables, or the corresponding CLI args. 13 85 14 - For PostgreSQL, the user and database must already be configured. Some example 15 - SQL commands are: 86 + For PostgreSQL, the user and database must already be configured. Some example SQL commands are: 16 87 17 88 CREATE DATABASE bgs; 18 89 CREATE DATABASE carstore; ··· 21 92 GRANT ALL PRIVILEGES ON DATABASE bgs TO ${username}; 22 93 GRANT ALL PRIVILEGES ON DATABASE carstore TO ${username}; 23 94 24 - This service currently uses `gorm` to automatically run database migrations as 25 - the regular user. There is no concept of running a separate set of migrations 26 - under more privileged database user. 95 + This service currently uses `gorm` to automatically run database migrations as the regular user. There is no concept of running a separate set of migrations under more privileged database user. 96 + 97 + 98 + ## Deployment 99 + 100 + *NOTE: this is not a complete guide to operating a Relay. There are decisions to be made and communicated about policies, bandwidth use, PDS crawling and rate-limits, financial sustainability, etc, which are not covered here. This is just a quick overview of how to technically get a relay up and running.* 101 + 102 + In a real-world system, you will probably want to use PostgreSQL for both the relay database and the carstore database. CAR shards will still be stored on-disk, resulting in many millions of files. Chose your storage hardware and filesystem carefully: we recommend XFS on local NVMe, not network-backed blockstorage (eg, not EBS volumes on AWS). 103 + 104 + Some notable configuration env vars to set: 105 + 106 + - `ENVIRONMENT`: eg, `production` 107 + - `DATABASE_URL`: see section below 108 + - `CARSTORE_DATABASE_URL`: see section below 109 + - `DATA_DIR`: CAR shards will be stored in a subdirectory 110 + - `GOLOG_LOG_LEVEL`: log verbosity 111 + - `RESOLVE_ADDRESS`: DNS server to use 112 + - `FORCE_DNS_UDP`: recommend "true" 113 + - `BGS_COMPACT_INTERVAL`: to control CAR compaction scheduling. for example, "8h" (every 8 hours). Set to "0" to disable automatic compaction. 114 + 115 + There is a health check endpoint at `/xrpc/_health`. Prometheus metrics are exposed by default on port 2471, path `/metrics`. The service logs fairly verbosely to stderr; use `GOLOG_LOG_LEVEL` to control log volume. 116 + 117 + As a rough guideline for the compute resources needed to run a full-network Relay, in June 2024 an example Relay for over 5 million repositories used: 118 + 119 + - around 30 million inodes (files) 120 + - roughly 1 TByte of disk for PostgreSQL 121 + - roughly 1 TByte of disk for CAR shard storage 122 + - roughly 5k disk I/O operations per second (all combined) 123 + - roughly 100% of one CPU core (quite low CPU utilization) 124 + - roughly 5GB of RAM for bigsky, and as much RAM as available for PostgreSQL and page cache 125 + - on the order of 1 megabit inbound bandwidth (crawling PDS instances) and 1 megabit outbound per connected client. 1 mbit continuous is approximately 350 GByte/month 126 + 127 + Be sure to double-check bandwidth usage and pricing if running a public relay! Bandwidth prices can vary widely between providers, and popular cloud services (AWS, Google Cloud, Azure) are very expensive compared to alternatives like OVH or Hetzner. 128 + 129 + 130 + ## Bootstrapping the Network 131 + 132 + To bootstrap the entire network, you'll want to start with a list of large PDS instances to backfill from. You could pull from a public dashboard of instances (like [mackuba's](https://blue.mackuba.eu/directory/pdses)), or scrape the full DID PLC directory, parse out all PDS service declarations, and sort by count. 133 + 134 + Once you have a set of PDS hosts, you can put the bare hostnames (not URLs: no `https://` prefix, port, or path suffix) in a `hosts.txt` file, and then use the `crawl_pds.sh` script to backfill and configure limits for all of them: 135 + 136 + export RELAY_HOST=your.pds.hostname.tld 137 + export RELAY_ADMIN_KEY=your-secret-key 138 + 139 + # both request crawl, and set generous crawl limits for each 140 + cat hosts.txt | parallel -j1 ./crawl_pds.sh {} 141 + 142 + Just consuming from the firehose for a few hours will only backfill accounts with activity during that period. This is fine to get the backfill process started, but eventually you'll want to do full "resync" of all the repositories on the PDS host to the most recent repo rev version. To enqueue that for all the PDS instances: 143 + 144 + # start sync/backfill of all accounts 145 + cat hosts.txt | parallel -j1 ./sync_pds.sh {}

+33

cmd/bigsky/crawl_pds.sh

··· 1 + #!/usr/bin/env bash 2 + 3 + set -e # fail on error 4 + set -u # fail if variable not set in substitution 5 + set -o pipefail # fail if part of a '|' command fails 6 + 7 + if test -z "${RELAY_ADMIN_KEY}"; then 8 + echo "RELAY_ADMIN_KEY secret is not defined" 9 + exit -1 10 + fi 11 + 12 + if test -z "${RELAY_HOST}"; then 13 + echo "RELAY_HOST config not defined" 14 + exit -1 15 + fi 16 + 17 + if test -z "$1"; then 18 + echo "expected PDS hostname as an argument" 19 + exit -1 20 + fi 21 + 22 + echo "requestCrawl $1" 23 + http --quiet --ignore-stdin post https://${RELAY_HOST}/admin/pds/requestCrawl Authorization:"Bearer ${RELAY_ADMIN_KEY}" \ 24 + hostname=$1 25 + 26 + echo "changeLimits $1" 27 + http --quiet --ignore-stdin post https://${RELAY_HOST}/admin/pds/changeLimits Authorization:"Bearer ${RELAY_ADMIN_KEY}" \ 28 + per_second:=100 \ 29 + per_hour:=1000000 \ 30 + per_day:=1000000 \ 31 + crawl_rate:=10 \ 32 + repo_limit:=1000000 \ 33 + host=$1

+56 -31

cmd/bigsky/main.go

··· 3 3 import ( 4 4 "context" 5 5 "fmt" 6 + "net/http" 7 + _ "net/http/pprof" 6 8 "os" 7 9 "os/signal" 8 10 "path/filepath" ··· 22 24 "github.com/bluesky-social/indigo/util" 23 25 "github.com/bluesky-social/indigo/util/cliutil" 24 26 "github.com/bluesky-social/indigo/xrpc" 25 - _ "go.uber.org/automaxprocs" 26 - 27 - "net/http" 28 - _ "net/http/pprof" 29 27 30 28 _ "github.com/joho/godotenv/autoload" 29 + _ "go.uber.org/automaxprocs" 31 30 32 31 "github.com/carlmjohnson/versioninfo" 33 32 logging "github.com/ipfs/go-log" ··· 50 49 } 51 50 52 51 func main() { 53 - run(os.Args) 52 + if err := run(os.Args); err != nil { 53 + log.Fatal(err) 54 + } 54 55 } 55 56 56 - func run(args []string) { 57 + func run(args []string) error { 58 + 57 59 app := cli.App{ 58 60 Name: "bigsky", 59 - Usage: "atproto BGS/firehose daemon", 61 + Usage: "atproto Relay daemon", 60 62 Version: versioninfo.Short(), 61 63 } 62 64 ··· 96 98 Usage: "when connecting to PDS instances, use ws:// instead of wss://", 97 99 }, 98 100 &cli.BoolFlag{ 99 - Name: "aggregation", 100 - Value: false, 101 - }, 102 - &cli.BoolFlag{ 103 101 Name: "spidering", 104 - Value: true, 105 - EnvVars: []string{"BGS_SPIDERING"}, 102 + Value: false, 103 + EnvVars: []string{"RELAY_SPIDERING", "BGS_SPIDERING"}, 106 104 }, 107 105 &cli.StringFlag{ 108 106 Name: "api-listen", ··· 111 109 &cli.StringFlag{ 112 110 Name: "metrics-listen", 113 111 Value: ":2471", 114 - EnvVars: []string{"BGS_METRICS_LISTEN"}, 112 + EnvVars: []string{"RELAY_METRICS_LISTEN", "BGS_METRICS_LISTEN"}, 115 113 }, 116 114 &cli.StringFlag{ 117 115 Name: "disk-persister-dir", ··· 119 117 }, 120 118 &cli.StringFlag{ 121 119 Name: "admin-key", 122 - EnvVars: []string{"BGS_ADMIN_KEY"}, 120 + EnvVars: []string{"RELAY_ADMIN_KEY", "BGS_ADMIN_KEY"}, 123 121 }, 124 122 &cli.StringSliceFlag{ 125 123 Name: "handle-resolver-hosts", ··· 137 135 }, 138 136 &cli.DurationFlag{ 139 137 Name: "compact-interval", 140 - EnvVars: []string{"BGS_COMPACT_INTERVAL"}, 138 + EnvVars: []string{"RELAY_COMPACT_INTERVAL", "BGS_COMPACT_INTERVAL"}, 141 139 Value: 4 * time.Hour, 142 140 Usage: "interval between compaction runs, set to 0 to disable scheduled compaction", 143 141 }, ··· 155 153 Value: 100, 156 154 EnvVars: []string{"MAX_FETCH_CONCURRENCY"}, 157 155 }, 156 + &cli.StringFlag{ 157 + Name: "env", 158 + Value: "dev", 159 + EnvVars: []string{"ENVIRONMENT"}, 160 + Usage: "declared hosting environment (prod, qa, etc); used in metrics", 161 + }, 162 + &cli.StringFlag{ 163 + Name: "otel-exporter-otlp-endpoint", 164 + EnvVars: []string{"OTEL_EXPORTER_OTLP_ENDPOINT"}, 165 + }, 166 + &cli.StringFlag{ 167 + Name: "bsky-social-rate-limit-skip", 168 + EnvVars: []string{"BSKY_SOCIAL_RATE_LIMIT_SKIP"}, 169 + Usage: "ratelimit bypass secret token for *.bsky.social domains", 170 + }, 158 171 } 159 172 160 - app.Action = Bigsky 161 - err := app.Run(os.Args) 162 - if err != nil { 163 - log.Fatal(err) 164 - } 173 + app.Action = runBigsky 174 + return app.Run(os.Args) 165 175 } 166 176 167 - func Bigsky(cctx *cli.Context) error { 168 - // Trap SIGINT to trigger a shutdown. 169 - signals := make(chan os.Signal, 1) 170 - signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM) 177 + func setupOTEL(cctx *cli.Context) error { 171 178 179 + env := cctx.String("env") 180 + if env == "" { 181 + env = "dev" 182 + } 172 183 if cctx.Bool("jaeger") { 173 184 url := "http://localhost:14268/api/traces" 174 185 exp, err := jaeger.New(jaeger.WithCollectorEndpoint(jaeger.WithEndpoint(url))) ··· 182 193 tracesdk.WithResource(resource.NewWithAttributes( 183 194 semconv.SchemaURL, 184 195 semconv.ServiceNameKey.String("bgs"), 185 - attribute.String("environment", "test"), 196 + attribute.String("env", env), // DataDog 197 + attribute.String("environment", env), // Others 186 198 attribute.Int64("ID", 1), 187 199 )), 188 200 ) ··· 195 207 // https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlptrace#readme-environment-variables 196 208 // At a minimum, you need to set 197 209 // OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 198 - if ep := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"); ep != "" { 210 + if ep := cctx.String("otel-exporter-otlp-endpoint"); ep != "" { 199 211 log.Infow("setting up trace exporter", "endpoint", ep) 200 212 ctx, cancel := context.WithCancel(context.Background()) 201 213 defer cancel() ··· 217 229 tracesdk.WithResource(resource.NewWithAttributes( 218 230 semconv.SchemaURL, 219 231 semconv.ServiceNameKey.String("bgs"), 220 - attribute.String("env", os.Getenv("ENVIRONMENT")), // DataDog 221 - attribute.String("environment", os.Getenv("ENVIRONMENT")), // Others 232 + attribute.String("env", env), // DataDog 233 + attribute.String("environment", env), // Others 222 234 attribute.Int64("ID", 1), 223 235 )), 224 236 ) 225 237 otel.SetTracerProvider(tp) 238 + } 239 + 240 + return nil 241 + } 242 + 243 + func runBigsky(cctx *cli.Context) error { 244 + // Trap SIGINT to trigger a shutdown. 245 + signals := make(chan os.Signal, 1) 246 + signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM) 247 + 248 + // start observability/tracing (OTEL and jaeger) 249 + if err := setupOTEL(cctx); err != nil { 250 + return err 226 251 } 227 252 228 253 // ensure data directory exists; won't error if it does ··· 301 326 302 327 rf := indexer.NewRepoFetcher(db, repoman, cctx.Int("max-fetch-concurrency")) 303 328 304 - ix, err := indexer.NewIndexer(db, notifman, evtman, cachedidr, rf, true, cctx.Bool("spidering"), cctx.Bool("aggregation")) 329 + ix, err := indexer.NewIndexer(db, notifman, evtman, cachedidr, rf, true, cctx.Bool("spidering"), false) 305 330 if err != nil { 306 331 return err 307 332 } 308 333 309 - rlskip := os.Getenv("BSKY_SOCIAL_RATE_LIMIT_SKIP") 334 + rlskip := cctx.String("bsky-social-rate-limit-skip") 310 335 ix.ApplyPDSClientSettings = func(c *xrpc.Client) { 311 336 if strings.HasSuffix(c.Host, ".bsky.network") { 312 337 if c.Client == nil {

+24

cmd/bigsky/sync_pds.sh

··· 1 + #!/usr/bin/env bash 2 + 3 + set -e # fail on error 4 + set -u # fail if variable not set in substitution 5 + set -o pipefail # fail if part of a '|' command fails 6 + 7 + if test -z "${RELAY_ADMIN_KEY}"; then 8 + echo "RELAY_ADMIN_KEY secret is not defined" 9 + exit -1 10 + fi 11 + 12 + if test -z "${RELAY_HOST}"; then 13 + echo "RELAY_HOST config not defined" 14 + exit -1 15 + fi 16 + 17 + if test -z "$1"; then 18 + echo "expected PDS hostname as an argument" 19 + exit -1 20 + fi 21 + 22 + echo "resync $1" 23 + http --quiet post https://${RELAY_HOST}/admin/pds/resync Authorization:"Bearer ${RELAY_ADMIN_KEY}" \ 24 + host=$1

+3 -3

indexer/indexer.go

··· 246 246 case *bsky.FeedGenerator: 247 247 return nil 248 248 default: 249 - log.Warnf("unrecognized record type: %T", op.Record) 249 + log.Warnw("unrecognized record type (crawling references)", "record", op.Record, "collection", op.Collection) 250 250 return nil 251 251 } 252 252 } ··· 531 531 case *bsky.ActorProfile: 532 532 log.Debugf("TODO: got actor profile record creation, need to do something with this") 533 533 default: 534 - return nil, fmt.Errorf("unrecognized record type: %T", rec) 534 + return nil, fmt.Errorf("unrecognized record type (creation): %s", op.Collection) 535 535 } 536 536 537 537 return out, nil ··· 689 689 case *bsky.ActorProfile: 690 690 log.Debugf("TODO: got actor profile record update, need to do something with this") 691 691 default: 692 - return fmt.Errorf("unrecognized record type: %T", rec) 692 + return fmt.Errorf("unrecognized record type (update): %s", op.Collection) 693 693 } 694 694 695 695 return nil

Configure Feed

Configure Feed