fork of indigo with slightly nicer lexgen

Merge branch 'main' into escalate-on-high-churn

Changed files
+5231 -1016
.github
api
atproto
automod
backfill
bgs
carstore
cmd
did
events
fakedata
indexer
lex
mst
pds
plc
repomgr
search
splitter
testing
util
+52
.github/workflows/container-rainbow-aws.yaml
··· 1 + name: container-rainbow-aws 2 + on: [push] 3 + env: 4 + REGISTRY: ${{ secrets.AWS_ECR_REGISTRY_USEAST2_PACKAGES_REGISTRY }} 5 + USERNAME: ${{ secrets.AWS_ECR_REGISTRY_USEAST2_PACKAGES_USERNAME }} 6 + PASSWORD: ${{ secrets.AWS_ECR_REGISTRY_USEAST2_PACKAGES_PASSWORD }} 7 + # github.repository as <account>/<repo> 8 + IMAGE_NAME: rainbow 9 + 10 + jobs: 11 + container-rainbow-aws: 12 + if: github.repository == 'bluesky-social/indigo' 13 + runs-on: ubuntu-latest 14 + permissions: 15 + contents: read 16 + packages: write 17 + id-token: write 18 + 19 + steps: 20 + - name: Checkout repository 21 + uses: actions/checkout@v3 22 + 23 + - name: Setup Docker buildx 24 + uses: docker/setup-buildx-action@v1 25 + 26 + - name: Log into registry ${{ env.REGISTRY }} 27 + uses: docker/login-action@v2 28 + with: 29 + registry: ${{ env.REGISTRY }} 30 + username: ${{ env.USERNAME }} 31 + password: ${{ env.PASSWORD }} 32 + 33 + - name: Extract Docker metadata 34 + id: meta 35 + uses: docker/metadata-action@v4 36 + with: 37 + images: | 38 + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 39 + tags: | 40 + type=sha,enable=true,priority=100,prefix=,suffix=,format=long 41 + 42 + - name: Build and push Docker image 43 + id: build-and-push 44 + uses: docker/build-push-action@v4 45 + with: 46 + context: . 47 + file: ./cmd/rainbow/Dockerfile 48 + push: ${{ github.event_name != 'pull_request' }} 49 + tags: ${{ steps.meta.outputs.tags }} 50 + labels: ${{ steps.meta.outputs.labels }} 51 + cache-from: type=gha 52 + cache-to: type=gha,mode=max
+2 -2
.github/workflows/golang.yml
··· 19 19 - name: Set up Go tooling 20 20 uses: actions/setup-go@v4 21 21 with: 22 - go-version: "1.22" 22 + go-version: "1.23" 23 23 - name: Build 24 24 run: make build 25 25 - name: Test ··· 32 32 - name: Set up Go tooling 33 33 uses: actions/setup-go@v4 34 34 with: 35 - go-version: "1.22" 35 + go-version: "1.23" 36 36 - name: Lint 37 37 run: make lint
+1
HACKING.md
··· 14 14 - `cmd/supercollider`: event stream load generation tool 15 15 - `cmd/sonar`: event stream monitoring tool 16 16 - `cmd/hepa`: auto-moderation rule engine service 17 + - `cmd/rainbow`: firehose fanout service 17 18 - `gen`: dev tool to run CBOR type codegen 18 19 19 20 Packages:
+4
api/agnostic/doc.go
··· 1 + // Package indigo/api/agnositc provides schema-agnostic helpers for fetching records from the network. 2 + // 3 + // These are variants of endpoints in indigo/api/atproto. 4 + package agnostic
+189
api/agnostic/repoapplyWrites.go
··· 1 + // Copied from indigo:api/atproto/repoapplyWrites.go 2 + 3 + package agnostic 4 + 5 + // schema: com.atproto.repo.applyWrites 6 + 7 + import ( 8 + "context" 9 + "encoding/json" 10 + "fmt" 11 + 12 + "github.com/bluesky-social/indigo/lex/util" 13 + "github.com/bluesky-social/indigo/xrpc" 14 + ) 15 + 16 + // RepoApplyWrites_Create is a "create" in the com.atproto.repo.applyWrites schema. 17 + // 18 + // Operation which creates a new record. 19 + // 20 + // RECORDTYPE: RepoApplyWrites_Create 21 + type RepoApplyWrites_Create struct { 22 + LexiconTypeID string `json:"$type,const=com.atproto.repo.applyWrites#create" cborgen:"$type,const=com.atproto.repo.applyWrites#create"` 23 + Collection string `json:"collection" cborgen:"collection"` 24 + Rkey *string `json:"rkey,omitempty" cborgen:"rkey,omitempty"` 25 + Value *json.RawMessage `json:"value" cborgen:"value"` 26 + } 27 + 28 + // RepoApplyWrites_CreateResult is a "createResult" in the com.atproto.repo.applyWrites schema. 29 + // 30 + // RECORDTYPE: RepoApplyWrites_CreateResult 31 + type RepoApplyWrites_CreateResult struct { 32 + LexiconTypeID string `json:"$type,const=com.atproto.repo.applyWrites#createResult" cborgen:"$type,const=com.atproto.repo.applyWrites#createResult"` 33 + Cid string `json:"cid" cborgen:"cid"` 34 + Uri string `json:"uri" cborgen:"uri"` 35 + ValidationStatus *string `json:"validationStatus,omitempty" cborgen:"validationStatus,omitempty"` 36 + } 37 + 38 + // RepoApplyWrites_Delete is a "delete" in the com.atproto.repo.applyWrites schema. 39 + // 40 + // Operation which deletes an existing record. 41 + // 42 + // RECORDTYPE: RepoApplyWrites_Delete 43 + type RepoApplyWrites_Delete struct { 44 + LexiconTypeID string `json:"$type,const=com.atproto.repo.applyWrites#delete" cborgen:"$type,const=com.atproto.repo.applyWrites#delete"` 45 + Collection string `json:"collection" cborgen:"collection"` 46 + Rkey string `json:"rkey" cborgen:"rkey"` 47 + } 48 + 49 + // RepoApplyWrites_DeleteResult is a "deleteResult" in the com.atproto.repo.applyWrites schema. 50 + // 51 + // RECORDTYPE: RepoApplyWrites_DeleteResult 52 + type RepoApplyWrites_DeleteResult struct { 53 + LexiconTypeID string `json:"$type,const=com.atproto.repo.applyWrites#deleteResult" cborgen:"$type,const=com.atproto.repo.applyWrites#deleteResult"` 54 + } 55 + 56 + // RepoApplyWrites_Input is the input argument to a com.atproto.repo.applyWrites call. 57 + type RepoApplyWrites_Input struct { 58 + // repo: The handle or DID of the repo (aka, current account). 59 + Repo string `json:"repo" cborgen:"repo"` 60 + // swapCommit: If provided, the entire operation will fail if the current repo commit CID does not match this value. Used to prevent conflicting repo mutations. 61 + SwapCommit *string `json:"swapCommit,omitempty" cborgen:"swapCommit,omitempty"` 62 + // validate: Can be set to 'false' to skip Lexicon schema validation of record data across all operations, 'true' to require it, or leave unset to validate only for known Lexicons. 63 + Validate *bool `json:"validate,omitempty" cborgen:"validate,omitempty"` 64 + Writes []*RepoApplyWrites_Input_Writes_Elem `json:"writes" cborgen:"writes"` 65 + } 66 + 67 + type RepoApplyWrites_Input_Writes_Elem struct { 68 + RepoApplyWrites_Create *RepoApplyWrites_Create 69 + RepoApplyWrites_Update *RepoApplyWrites_Update 70 + RepoApplyWrites_Delete *RepoApplyWrites_Delete 71 + } 72 + 73 + func (t *RepoApplyWrites_Input_Writes_Elem) MarshalJSON() ([]byte, error) { 74 + if t.RepoApplyWrites_Create != nil { 75 + t.RepoApplyWrites_Create.LexiconTypeID = "com.atproto.repo.applyWrites#create" 76 + return json.Marshal(t.RepoApplyWrites_Create) 77 + } 78 + if t.RepoApplyWrites_Update != nil { 79 + t.RepoApplyWrites_Update.LexiconTypeID = "com.atproto.repo.applyWrites#update" 80 + return json.Marshal(t.RepoApplyWrites_Update) 81 + } 82 + if t.RepoApplyWrites_Delete != nil { 83 + t.RepoApplyWrites_Delete.LexiconTypeID = "com.atproto.repo.applyWrites#delete" 84 + return json.Marshal(t.RepoApplyWrites_Delete) 85 + } 86 + return nil, fmt.Errorf("cannot marshal empty enum") 87 + } 88 + func (t *RepoApplyWrites_Input_Writes_Elem) UnmarshalJSON(b []byte) error { 89 + typ, err := util.TypeExtract(b) 90 + if err != nil { 91 + return err 92 + } 93 + 94 + switch typ { 95 + case "com.atproto.repo.applyWrites#create": 96 + t.RepoApplyWrites_Create = new(RepoApplyWrites_Create) 97 + return json.Unmarshal(b, t.RepoApplyWrites_Create) 98 + case "com.atproto.repo.applyWrites#update": 99 + t.RepoApplyWrites_Update = new(RepoApplyWrites_Update) 100 + return json.Unmarshal(b, t.RepoApplyWrites_Update) 101 + case "com.atproto.repo.applyWrites#delete": 102 + t.RepoApplyWrites_Delete = new(RepoApplyWrites_Delete) 103 + return json.Unmarshal(b, t.RepoApplyWrites_Delete) 104 + 105 + default: 106 + return fmt.Errorf("closed enums must have a matching value") 107 + } 108 + } 109 + 110 + // RepoApplyWrites_Output is the output of a com.atproto.repo.applyWrites call. 111 + type RepoApplyWrites_Output struct { 112 + Commit *RepoDefs_CommitMeta `json:"commit,omitempty" cborgen:"commit,omitempty"` 113 + Results []*RepoApplyWrites_Output_Results_Elem `json:"results,omitempty" cborgen:"results,omitempty"` 114 + } 115 + 116 + type RepoApplyWrites_Output_Results_Elem struct { 117 + RepoApplyWrites_CreateResult *RepoApplyWrites_CreateResult 118 + RepoApplyWrites_UpdateResult *RepoApplyWrites_UpdateResult 119 + RepoApplyWrites_DeleteResult *RepoApplyWrites_DeleteResult 120 + } 121 + 122 + func (t *RepoApplyWrites_Output_Results_Elem) MarshalJSON() ([]byte, error) { 123 + if t.RepoApplyWrites_CreateResult != nil { 124 + t.RepoApplyWrites_CreateResult.LexiconTypeID = "com.atproto.repo.applyWrites#createResult" 125 + return json.Marshal(t.RepoApplyWrites_CreateResult) 126 + } 127 + if t.RepoApplyWrites_UpdateResult != nil { 128 + t.RepoApplyWrites_UpdateResult.LexiconTypeID = "com.atproto.repo.applyWrites#updateResult" 129 + return json.Marshal(t.RepoApplyWrites_UpdateResult) 130 + } 131 + if t.RepoApplyWrites_DeleteResult != nil { 132 + t.RepoApplyWrites_DeleteResult.LexiconTypeID = "com.atproto.repo.applyWrites#deleteResult" 133 + return json.Marshal(t.RepoApplyWrites_DeleteResult) 134 + } 135 + return nil, fmt.Errorf("cannot marshal empty enum") 136 + } 137 + func (t *RepoApplyWrites_Output_Results_Elem) UnmarshalJSON(b []byte) error { 138 + typ, err := util.TypeExtract(b) 139 + if err != nil { 140 + return err 141 + } 142 + 143 + switch typ { 144 + case "com.atproto.repo.applyWrites#createResult": 145 + t.RepoApplyWrites_CreateResult = new(RepoApplyWrites_CreateResult) 146 + return json.Unmarshal(b, t.RepoApplyWrites_CreateResult) 147 + case "com.atproto.repo.applyWrites#updateResult": 148 + t.RepoApplyWrites_UpdateResult = new(RepoApplyWrites_UpdateResult) 149 + return json.Unmarshal(b, t.RepoApplyWrites_UpdateResult) 150 + case "com.atproto.repo.applyWrites#deleteResult": 151 + t.RepoApplyWrites_DeleteResult = new(RepoApplyWrites_DeleteResult) 152 + return json.Unmarshal(b, t.RepoApplyWrites_DeleteResult) 153 + 154 + default: 155 + return fmt.Errorf("closed enums must have a matching value") 156 + } 157 + } 158 + 159 + // RepoApplyWrites_Update is a "update" in the com.atproto.repo.applyWrites schema. 160 + // 161 + // Operation which updates an existing record. 162 + // 163 + // RECORDTYPE: RepoApplyWrites_Update 164 + type RepoApplyWrites_Update struct { 165 + LexiconTypeID string `json:"$type,const=com.atproto.repo.applyWrites#update" cborgen:"$type,const=com.atproto.repo.applyWrites#update"` 166 + Collection string `json:"collection" cborgen:"collection"` 167 + Rkey string `json:"rkey" cborgen:"rkey"` 168 + Value *json.RawMessage `json:"value" cborgen:"value"` 169 + } 170 + 171 + // RepoApplyWrites_UpdateResult is a "updateResult" in the com.atproto.repo.applyWrites schema. 172 + // 173 + // RECORDTYPE: RepoApplyWrites_UpdateResult 174 + type RepoApplyWrites_UpdateResult struct { 175 + LexiconTypeID string `json:"$type,const=com.atproto.repo.applyWrites#updateResult" cborgen:"$type,const=com.atproto.repo.applyWrites#updateResult"` 176 + Cid string `json:"cid" cborgen:"cid"` 177 + Uri string `json:"uri" cborgen:"uri"` 178 + ValidationStatus *string `json:"validationStatus,omitempty" cborgen:"validationStatus,omitempty"` 179 + } 180 + 181 + // RepoApplyWrites calls the XRPC method "com.atproto.repo.applyWrites". 182 + func RepoApplyWrites(ctx context.Context, c *xrpc.Client, input *RepoApplyWrites_Input) (*RepoApplyWrites_Output, error) { 183 + var out RepoApplyWrites_Output 184 + if err := c.Do(ctx, xrpc.Procedure, "application/json", "com.atproto.repo.applyWrites", nil, input, &out); err != nil { 185 + return nil, err 186 + } 187 + 188 + return &out, nil 189 + }
+2
api/atproto/servercreateSession.go
··· 12 12 13 13 // ServerCreateSession_Input is the input argument to a com.atproto.server.createSession call. 14 14 type ServerCreateSession_Input struct { 15 + // allowTakendown: When true, instead of throwing error for takendown accounts, a valid response with a narrow scoped token will be returned 16 + AllowTakendown *bool `json:"allowTakendown,omitempty" cborgen:"allowTakendown,omitempty"` 15 17 AuthFactorToken *string `json:"authFactorToken,omitempty" cborgen:"authFactorToken,omitempty"` 16 18 // identifier: Handle or other identifier supported by the server for the authenticating user. 17 19 Identifier string `json:"identifier" cborgen:"identifier"`
+30
api/atproto/tempaddReservedHandle.go
··· 1 + // Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT. 2 + 3 + package atproto 4 + 5 + // schema: com.atproto.temp.addReservedHandle 6 + 7 + import ( 8 + "context" 9 + 10 + "github.com/bluesky-social/indigo/xrpc" 11 + ) 12 + 13 + // TempAddReservedHandle_Input is the input argument to a com.atproto.temp.addReservedHandle call. 14 + type TempAddReservedHandle_Input struct { 15 + Handle string `json:"handle" cborgen:"handle"` 16 + } 17 + 18 + // TempAddReservedHandle_Output is the output of a com.atproto.temp.addReservedHandle call. 19 + type TempAddReservedHandle_Output struct { 20 + } 21 + 22 + // TempAddReservedHandle calls the XRPC method "com.atproto.temp.addReservedHandle". 23 + func TempAddReservedHandle(ctx context.Context, c *xrpc.Client, input *TempAddReservedHandle_Input) (*TempAddReservedHandle_Output, error) { 24 + var out TempAddReservedHandle_Output 25 + if err := c.Do(ctx, xrpc.Procedure, "application/json", "com.atproto.temp.addReservedHandle", nil, input, &out); err != nil { 26 + return nil, err 27 + } 28 + 29 + return &out, nil 30 + }
+2
api/bsky/actorgetSuggestions.go
··· 14 14 type ActorGetSuggestions_Output struct { 15 15 Actors []*ActorDefs_ProfileView `json:"actors" cborgen:"actors"` 16 16 Cursor *string `json:"cursor,omitempty" cborgen:"cursor,omitempty"` 17 + // recId: Snowflake for this recommendation, use when submitting recommendation events. 18 + RecId *int64 `json:"recId,omitempty" cborgen:"recId,omitempty"` 17 19 } 18 20 19 21 // ActorGetSuggestions calls the XRPC method "app.bsky.actor.getSuggestions".
+3 -1
api/bsky/graphgetSuggestedFollowsByActor.go
··· 13 13 // GraphGetSuggestedFollowsByActor_Output is the output of a app.bsky.graph.getSuggestedFollowsByActor call. 14 14 type GraphGetSuggestedFollowsByActor_Output struct { 15 15 // isFallback: If true, response has fallen-back to generic results, and is not scoped using relativeToDid 16 - IsFallback *bool `json:"isFallback,omitempty" cborgen:"isFallback,omitempty"` 16 + IsFallback *bool `json:"isFallback,omitempty" cborgen:"isFallback,omitempty"` 17 + // recId: Snowflake for this recommendation, use when submitting recommendation events. 18 + RecId *int64 `json:"recId,omitempty" cborgen:"recId,omitempty"` 17 19 Suggestions []*ActorDefs_ProfileView `json:"suggestions" cborgen:"suggestions"` 18 20 } 19 21
+35
api/bsky/graphsearchStarterPacks.go
··· 1 + // Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT. 2 + 3 + package bsky 4 + 5 + // schema: app.bsky.graph.searchStarterPacks 6 + 7 + import ( 8 + "context" 9 + 10 + "github.com/bluesky-social/indigo/xrpc" 11 + ) 12 + 13 + // GraphSearchStarterPacks_Output is the output of a app.bsky.graph.searchStarterPacks call. 14 + type GraphSearchStarterPacks_Output struct { 15 + Cursor *string `json:"cursor,omitempty" cborgen:"cursor,omitempty"` 16 + StarterPacks []*GraphDefs_StarterPackViewBasic `json:"starterPacks" cborgen:"starterPacks"` 17 + } 18 + 19 + // GraphSearchStarterPacks calls the XRPC method "app.bsky.graph.searchStarterPacks". 20 + // 21 + // q: Search query string. Syntax, phrase, boolean, and faceting is unspecified, but Lucene query syntax is recommended. 22 + func GraphSearchStarterPacks(ctx context.Context, c *xrpc.Client, cursor string, limit int64, q string) (*GraphSearchStarterPacks_Output, error) { 23 + var out GraphSearchStarterPacks_Output 24 + 25 + params := map[string]interface{}{ 26 + "cursor": cursor, 27 + "limit": limit, 28 + "q": q, 29 + } 30 + if err := c.Do(ctx, xrpc.Query, "", "app.bsky.graph.searchStarterPacks", params, nil, &out); err != nil { 31 + return nil, err 32 + } 33 + 34 + return &out, nil 35 + }
+4 -1
api/bsky/notificationlistNotifications.go
··· 35 35 } 36 36 37 37 // NotificationListNotifications calls the XRPC method "app.bsky.notification.listNotifications". 38 - func NotificationListNotifications(ctx context.Context, c *xrpc.Client, cursor string, limit int64, priority bool, seenAt string) (*NotificationListNotifications_Output, error) { 38 + // 39 + // reasons: Notification reasons to include in response. 40 + func NotificationListNotifications(ctx context.Context, c *xrpc.Client, cursor string, limit int64, priority bool, reasons []string, seenAt string) (*NotificationListNotifications_Output, error) { 39 41 var out NotificationListNotifications_Output 40 42 41 43 params := map[string]interface{}{ 42 44 "cursor": cursor, 43 45 "limit": limit, 44 46 "priority": priority, 47 + "reasons": reasons, 45 48 "seenAt": seenAt, 46 49 } 47 50 if err := c.Do(ctx, xrpc.Query, "", "app.bsky.notification.listNotifications", params, nil, &out); err != nil {
+13
api/bsky/unspecceddefs.go
··· 13 13 type UnspeccedDefs_SkeletonSearchPost struct { 14 14 Uri string `json:"uri" cborgen:"uri"` 15 15 } 16 + 17 + // UnspeccedDefs_SkeletonSearchStarterPack is a "skeletonSearchStarterPack" in the app.bsky.unspecced.defs schema. 18 + type UnspeccedDefs_SkeletonSearchStarterPack struct { 19 + Uri string `json:"uri" cborgen:"uri"` 20 + } 21 + 22 + // UnspeccedDefs_TrendingTopic is a "trendingTopic" in the app.bsky.unspecced.defs schema. 23 + type UnspeccedDefs_TrendingTopic struct { 24 + Description *string `json:"description,omitempty" cborgen:"description,omitempty"` 25 + DisplayName *string `json:"displayName,omitempty" cborgen:"displayName,omitempty"` 26 + Link string `json:"link" cborgen:"link"` 27 + Topic string `json:"topic" cborgen:"topic"` 28 + }
+2
api/bsky/unspeccedgetSuggestionsSkeleton.go
··· 14 14 type UnspeccedGetSuggestionsSkeleton_Output struct { 15 15 Actors []*UnspeccedDefs_SkeletonSearchActor `json:"actors" cborgen:"actors"` 16 16 Cursor *string `json:"cursor,omitempty" cborgen:"cursor,omitempty"` 17 + // recId: Snowflake for this recommendation, use when submitting recommendation events. 18 + RecId *int64 `json:"recId,omitempty" cborgen:"recId,omitempty"` 17 19 // relativeToDid: DID of the account these suggestions are relative to. If this is returned undefined, suggestions are based on the viewer. 18 20 RelativeToDid *string `json:"relativeToDid,omitempty" cborgen:"relativeToDid,omitempty"` 19 21 }
+34
api/bsky/unspeccedgetTrendingTopics.go
··· 1 + // Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT. 2 + 3 + package bsky 4 + 5 + // schema: app.bsky.unspecced.getTrendingTopics 6 + 7 + import ( 8 + "context" 9 + 10 + "github.com/bluesky-social/indigo/xrpc" 11 + ) 12 + 13 + // UnspeccedGetTrendingTopics_Output is the output of a app.bsky.unspecced.getTrendingTopics call. 14 + type UnspeccedGetTrendingTopics_Output struct { 15 + Suggested []*UnspeccedDefs_TrendingTopic `json:"suggested" cborgen:"suggested"` 16 + Topics []*UnspeccedDefs_TrendingTopic `json:"topics" cborgen:"topics"` 17 + } 18 + 19 + // UnspeccedGetTrendingTopics calls the XRPC method "app.bsky.unspecced.getTrendingTopics". 20 + // 21 + // viewer: DID of the account making the request (not included for public/unauthenticated queries). Used to boost followed accounts in ranking. 22 + func UnspeccedGetTrendingTopics(ctx context.Context, c *xrpc.Client, limit int64, viewer string) (*UnspeccedGetTrendingTopics_Output, error) { 23 + var out UnspeccedGetTrendingTopics_Output 24 + 25 + params := map[string]interface{}{ 26 + "limit": limit, 27 + "viewer": viewer, 28 + } 29 + if err := c.Do(ctx, xrpc.Query, "", "app.bsky.unspecced.getTrendingTopics", params, nil, &out); err != nil { 30 + return nil, err 31 + } 32 + 33 + return &out, nil 34 + }
+40
api/bsky/unspeccedsearchStarterPacksSkeleton.go
··· 1 + // Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT. 2 + 3 + package bsky 4 + 5 + // schema: app.bsky.unspecced.searchStarterPacksSkeleton 6 + 7 + import ( 8 + "context" 9 + 10 + "github.com/bluesky-social/indigo/xrpc" 11 + ) 12 + 13 + // UnspeccedSearchStarterPacksSkeleton_Output is the output of a app.bsky.unspecced.searchStarterPacksSkeleton call. 14 + type UnspeccedSearchStarterPacksSkeleton_Output struct { 15 + Cursor *string `json:"cursor,omitempty" cborgen:"cursor,omitempty"` 16 + // hitsTotal: Count of search hits. Optional, may be rounded/truncated, and may not be possible to paginate through all hits. 17 + HitsTotal *int64 `json:"hitsTotal,omitempty" cborgen:"hitsTotal,omitempty"` 18 + StarterPacks []*UnspeccedDefs_SkeletonSearchStarterPack `json:"starterPacks" cborgen:"starterPacks"` 19 + } 20 + 21 + // UnspeccedSearchStarterPacksSkeleton calls the XRPC method "app.bsky.unspecced.searchStarterPacksSkeleton". 22 + // 23 + // cursor: Optional pagination mechanism; may not necessarily allow scrolling through entire result set. 24 + // q: Search query string; syntax, phrase, boolean, and faceting is unspecified, but Lucene query syntax is recommended. 25 + // viewer: DID of the account making the request (not included for public/unauthenticated queries). 26 + func UnspeccedSearchStarterPacksSkeleton(ctx context.Context, c *xrpc.Client, cursor string, limit int64, q string, viewer string) (*UnspeccedSearchStarterPacksSkeleton_Output, error) { 27 + var out UnspeccedSearchStarterPacksSkeleton_Output 28 + 29 + params := map[string]interface{}{ 30 + "cursor": cursor, 31 + "limit": limit, 32 + "q": q, 33 + "viewer": viewer, 34 + } 35 + if err := c.Do(ctx, xrpc.Query, "", "app.bsky.unspecced.searchStarterPacksSkeleton", params, nil, &out); err != nil { 36 + return nil, err 37 + } 38 + 39 + return &out, nil 40 + }
+1
api/chat/convodefs.go
··· 18 18 LastMessage *ConvoDefs_ConvoView_LastMessage `json:"lastMessage,omitempty" cborgen:"lastMessage,omitempty"` 19 19 Members []*ActorDefs_ProfileViewBasic `json:"members" cborgen:"members"` 20 20 Muted bool `json:"muted" cborgen:"muted"` 21 + Opened *bool `json:"opened,omitempty" cborgen:"opened,omitempty"` 21 22 Rev string `json:"rev" cborgen:"rev"` 22 23 UnreadCount int64 `json:"unreadCount" cborgen:"unreadCount"` 23 24 }
+159 -6
api/ozone/moderationdefs.go
··· 13 13 "github.com/bluesky-social/indigo/lex/util" 14 14 ) 15 15 16 + // ModerationDefs_AccountEvent is a "accountEvent" in the tools.ozone.moderation.defs schema. 17 + // 18 + // Logs account status related events on a repo subject. Normally captured by automod from the firehose and emitted to ozone for historical tracking. 19 + // 20 + // RECORDTYPE: ModerationDefs_AccountEvent 21 + type ModerationDefs_AccountEvent struct { 22 + LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#accountEvent" cborgen:"$type,const=tools.ozone.moderation.defs#accountEvent"` 23 + // active: Indicates that the account has a repository which can be fetched from the host that emitted this event. 24 + Active bool `json:"active" cborgen:"active"` 25 + Comment *string `json:"comment,omitempty" cborgen:"comment,omitempty"` 26 + Status *string `json:"status,omitempty" cborgen:"status,omitempty"` 27 + Timestamp string `json:"timestamp" cborgen:"timestamp"` 28 + } 29 + 30 + // ModerationDefs_AccountHosting is a "accountHosting" in the tools.ozone.moderation.defs schema. 31 + // 32 + // RECORDTYPE: ModerationDefs_AccountHosting 33 + type ModerationDefs_AccountHosting struct { 34 + LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#accountHosting" cborgen:"$type,const=tools.ozone.moderation.defs#accountHosting"` 35 + CreatedAt *string `json:"createdAt,omitempty" cborgen:"createdAt,omitempty"` 36 + DeactivatedAt *string `json:"deactivatedAt,omitempty" cborgen:"deactivatedAt,omitempty"` 37 + DeletedAt *string `json:"deletedAt,omitempty" cborgen:"deletedAt,omitempty"` 38 + ReactivatedAt *string `json:"reactivatedAt,omitempty" cborgen:"reactivatedAt,omitempty"` 39 + Status string `json:"status" cborgen:"status"` 40 + UpdatedAt *string `json:"updatedAt,omitempty" cborgen:"updatedAt,omitempty"` 41 + } 42 + 16 43 // ModerationDefs_BlobView is a "blobView" in the tools.ozone.moderation.defs schema. 17 44 type ModerationDefs_BlobView struct { 18 45 Cid string `json:"cid" cborgen:"cid"` ··· 58 85 } 59 86 } 60 87 88 + // ModerationDefs_IdentityEvent is a "identityEvent" in the tools.ozone.moderation.defs schema. 89 + // 90 + // Logs identity related events on a repo subject. Normally captured by automod from the firehose and emitted to ozone for historical tracking. 91 + // 92 + // RECORDTYPE: ModerationDefs_IdentityEvent 93 + type ModerationDefs_IdentityEvent struct { 94 + LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#identityEvent" cborgen:"$type,const=tools.ozone.moderation.defs#identityEvent"` 95 + Comment *string `json:"comment,omitempty" cborgen:"comment,omitempty"` 96 + Handle *string `json:"handle,omitempty" cborgen:"handle,omitempty"` 97 + PdsHost *string `json:"pdsHost,omitempty" cborgen:"pdsHost,omitempty"` 98 + Timestamp string `json:"timestamp" cborgen:"timestamp"` 99 + Tombstone *bool `json:"tombstone,omitempty" cborgen:"tombstone,omitempty"` 100 + } 101 + 61 102 // ModerationDefs_ImageDetails is a "imageDetails" in the tools.ozone.moderation.defs schema. 62 103 // 63 104 // RECORDTYPE: ModerationDefs_ImageDetails ··· 71 112 // 72 113 // RECORDTYPE: ModerationDefs_ModEventAcknowledge 73 114 type ModerationDefs_ModEventAcknowledge struct { 74 - LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#modEventAcknowledge" cborgen:"$type,const=tools.ozone.moderation.defs#modEventAcknowledge"` 75 - Comment *string `json:"comment,omitempty" cborgen:"comment,omitempty"` 115 + LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#modEventAcknowledge" cborgen:"$type,const=tools.ozone.moderation.defs#modEventAcknowledge"` 116 + // acknowledgeAccountSubjects: If true, all other reports on content authored by this account will be resolved (acknowledged). 117 + AcknowledgeAccountSubjects *bool `json:"acknowledgeAccountSubjects,omitempty" cborgen:"acknowledgeAccountSubjects,omitempty"` 118 + Comment *string `json:"comment,omitempty" cborgen:"comment,omitempty"` 76 119 } 77 120 78 121 // ModerationDefs_ModEventComment is a "modEventComment" in the tools.ozone.moderation.defs schema. ··· 152 195 type ModerationDefs_ModEventMuteReporter struct { 153 196 LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#modEventMuteReporter" cborgen:"$type,const=tools.ozone.moderation.defs#modEventMuteReporter"` 154 197 Comment *string `json:"comment,omitempty" cborgen:"comment,omitempty"` 155 - // durationInHours: Indicates how long the account should remain muted. 156 - DurationInHours int64 `json:"durationInHours" cborgen:"durationInHours"` 198 + // durationInHours: Indicates how long the account should remain muted. Falsy value here means a permanent mute. 199 + DurationInHours *int64 `json:"durationInHours,omitempty" cborgen:"durationInHours,omitempty"` 157 200 } 158 201 159 202 // ModerationDefs_ModEventReport is a "modEventReport" in the tools.ozone.moderation.defs schema. ··· 218 261 Comment *string `json:"comment,omitempty" cborgen:"comment,omitempty"` 219 262 // durationInHours: Indicates how long the takedown should be in effect before automatically expiring. 220 263 DurationInHours *int64 `json:"durationInHours,omitempty" cborgen:"durationInHours,omitempty"` 264 + // policies: Names/Keywords of the policies that drove the decision. 265 + Policies []string `json:"policies,omitempty" cborgen:"policies,omitempty"` 221 266 } 222 267 223 268 // ModerationDefs_ModEventUnmute is a "modEventUnmute" in the tools.ozone.moderation.defs schema. ··· 280 325 ModerationDefs_ModEventResolveAppeal *ModerationDefs_ModEventResolveAppeal 281 326 ModerationDefs_ModEventDivert *ModerationDefs_ModEventDivert 282 327 ModerationDefs_ModEventTag *ModerationDefs_ModEventTag 328 + ModerationDefs_AccountEvent *ModerationDefs_AccountEvent 329 + ModerationDefs_IdentityEvent *ModerationDefs_IdentityEvent 330 + ModerationDefs_RecordEvent *ModerationDefs_RecordEvent 283 331 } 284 332 285 333 func (t *ModerationDefs_ModEventViewDetail_Event) MarshalJSON() ([]byte, error) { ··· 343 391 t.ModerationDefs_ModEventTag.LexiconTypeID = "tools.ozone.moderation.defs#modEventTag" 344 392 return json.Marshal(t.ModerationDefs_ModEventTag) 345 393 } 394 + if t.ModerationDefs_AccountEvent != nil { 395 + t.ModerationDefs_AccountEvent.LexiconTypeID = "tools.ozone.moderation.defs#accountEvent" 396 + return json.Marshal(t.ModerationDefs_AccountEvent) 397 + } 398 + if t.ModerationDefs_IdentityEvent != nil { 399 + t.ModerationDefs_IdentityEvent.LexiconTypeID = "tools.ozone.moderation.defs#identityEvent" 400 + return json.Marshal(t.ModerationDefs_IdentityEvent) 401 + } 402 + if t.ModerationDefs_RecordEvent != nil { 403 + t.ModerationDefs_RecordEvent.LexiconTypeID = "tools.ozone.moderation.defs#recordEvent" 404 + return json.Marshal(t.ModerationDefs_RecordEvent) 405 + } 346 406 return nil, fmt.Errorf("cannot marshal empty enum") 347 407 } 348 408 func (t *ModerationDefs_ModEventViewDetail_Event) UnmarshalJSON(b []byte) error { ··· 397 457 case "tools.ozone.moderation.defs#modEventTag": 398 458 t.ModerationDefs_ModEventTag = new(ModerationDefs_ModEventTag) 399 459 return json.Unmarshal(b, t.ModerationDefs_ModEventTag) 460 + case "tools.ozone.moderation.defs#accountEvent": 461 + t.ModerationDefs_AccountEvent = new(ModerationDefs_AccountEvent) 462 + return json.Unmarshal(b, t.ModerationDefs_AccountEvent) 463 + case "tools.ozone.moderation.defs#identityEvent": 464 + t.ModerationDefs_IdentityEvent = new(ModerationDefs_IdentityEvent) 465 + return json.Unmarshal(b, t.ModerationDefs_IdentityEvent) 466 + case "tools.ozone.moderation.defs#recordEvent": 467 + t.ModerationDefs_RecordEvent = new(ModerationDefs_RecordEvent) 468 + return json.Unmarshal(b, t.ModerationDefs_RecordEvent) 400 469 401 470 default: 402 471 return nil ··· 470 539 ModerationDefs_ModEventResolveAppeal *ModerationDefs_ModEventResolveAppeal 471 540 ModerationDefs_ModEventDivert *ModerationDefs_ModEventDivert 472 541 ModerationDefs_ModEventTag *ModerationDefs_ModEventTag 542 + ModerationDefs_AccountEvent *ModerationDefs_AccountEvent 543 + ModerationDefs_IdentityEvent *ModerationDefs_IdentityEvent 544 + ModerationDefs_RecordEvent *ModerationDefs_RecordEvent 473 545 } 474 546 475 547 func (t *ModerationDefs_ModEventView_Event) MarshalJSON() ([]byte, error) { ··· 533 605 t.ModerationDefs_ModEventTag.LexiconTypeID = "tools.ozone.moderation.defs#modEventTag" 534 606 return json.Marshal(t.ModerationDefs_ModEventTag) 535 607 } 608 + if t.ModerationDefs_AccountEvent != nil { 609 + t.ModerationDefs_AccountEvent.LexiconTypeID = "tools.ozone.moderation.defs#accountEvent" 610 + return json.Marshal(t.ModerationDefs_AccountEvent) 611 + } 612 + if t.ModerationDefs_IdentityEvent != nil { 613 + t.ModerationDefs_IdentityEvent.LexiconTypeID = "tools.ozone.moderation.defs#identityEvent" 614 + return json.Marshal(t.ModerationDefs_IdentityEvent) 615 + } 616 + if t.ModerationDefs_RecordEvent != nil { 617 + t.ModerationDefs_RecordEvent.LexiconTypeID = "tools.ozone.moderation.defs#recordEvent" 618 + return json.Marshal(t.ModerationDefs_RecordEvent) 619 + } 536 620 return nil, fmt.Errorf("cannot marshal empty enum") 537 621 } 538 622 func (t *ModerationDefs_ModEventView_Event) UnmarshalJSON(b []byte) error { ··· 587 671 case "tools.ozone.moderation.defs#modEventTag": 588 672 t.ModerationDefs_ModEventTag = new(ModerationDefs_ModEventTag) 589 673 return json.Unmarshal(b, t.ModerationDefs_ModEventTag) 674 + case "tools.ozone.moderation.defs#accountEvent": 675 + t.ModerationDefs_AccountEvent = new(ModerationDefs_AccountEvent) 676 + return json.Unmarshal(b, t.ModerationDefs_AccountEvent) 677 + case "tools.ozone.moderation.defs#identityEvent": 678 + t.ModerationDefs_IdentityEvent = new(ModerationDefs_IdentityEvent) 679 + return json.Unmarshal(b, t.ModerationDefs_IdentityEvent) 680 + case "tools.ozone.moderation.defs#recordEvent": 681 + t.ModerationDefs_RecordEvent = new(ModerationDefs_RecordEvent) 682 + return json.Unmarshal(b, t.ModerationDefs_RecordEvent) 590 683 591 684 default: 592 685 return nil ··· 646 739 SubjectStatus *ModerationDefs_SubjectStatusView `json:"subjectStatus,omitempty" cborgen:"subjectStatus,omitempty"` 647 740 } 648 741 742 + // ModerationDefs_RecordEvent is a "recordEvent" in the tools.ozone.moderation.defs schema. 743 + // 744 + // Logs lifecycle event on a record subject. Normally captured by automod from the firehose and emitted to ozone for historical tracking. 745 + // 746 + // RECORDTYPE: ModerationDefs_RecordEvent 747 + type ModerationDefs_RecordEvent struct { 748 + LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#recordEvent" cborgen:"$type,const=tools.ozone.moderation.defs#recordEvent"` 749 + Cid *string `json:"cid,omitempty" cborgen:"cid,omitempty"` 750 + Comment *string `json:"comment,omitempty" cborgen:"comment,omitempty"` 751 + Op string `json:"op" cborgen:"op"` 752 + Timestamp string `json:"timestamp" cborgen:"timestamp"` 753 + } 754 + 755 + // ModerationDefs_RecordHosting is a "recordHosting" in the tools.ozone.moderation.defs schema. 756 + // 757 + // RECORDTYPE: ModerationDefs_RecordHosting 758 + type ModerationDefs_RecordHosting struct { 759 + LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#recordHosting" cborgen:"$type,const=tools.ozone.moderation.defs#recordHosting"` 760 + CreatedAt *string `json:"createdAt,omitempty" cborgen:"createdAt,omitempty"` 761 + DeletedAt *string `json:"deletedAt,omitempty" cborgen:"deletedAt,omitempty"` 762 + Status string `json:"status" cborgen:"status"` 763 + UpdatedAt *string `json:"updatedAt,omitempty" cborgen:"updatedAt,omitempty"` 764 + } 765 + 649 766 // ModerationDefs_RecordView is a "recordView" in the tools.ozone.moderation.defs schema. 650 767 // 651 768 // RECORDTYPE: ModerationDefs_RecordView ··· 737 854 // comment: Sticky comment on the subject. 738 855 Comment *string `json:"comment,omitempty" cborgen:"comment,omitempty"` 739 856 // createdAt: Timestamp referencing the first moderation status impacting event was emitted on the subject 740 - CreatedAt string `json:"createdAt" cborgen:"createdAt"` 741 - Id int64 `json:"id" cborgen:"id"` 857 + CreatedAt string `json:"createdAt" cborgen:"createdAt"` 858 + Hosting *ModerationDefs_SubjectStatusView_Hosting `json:"hosting,omitempty" cborgen:"hosting,omitempty"` 859 + Id int64 `json:"id" cborgen:"id"` 742 860 // lastAppealedAt: Timestamp referencing when the author of the subject appealed a moderation action 743 861 LastAppealedAt *string `json:"lastAppealedAt,omitempty" cborgen:"lastAppealedAt,omitempty"` 744 862 LastReportedAt *string `json:"lastReportedAt,omitempty" cborgen:"lastReportedAt,omitempty"` ··· 755 873 Takendown *bool `json:"takendown,omitempty" cborgen:"takendown,omitempty"` 756 874 // updatedAt: Timestamp referencing when the last update was made to the moderation status of the subject 757 875 UpdatedAt string `json:"updatedAt" cborgen:"updatedAt"` 876 + } 877 + 878 + type ModerationDefs_SubjectStatusView_Hosting struct { 879 + ModerationDefs_AccountHosting *ModerationDefs_AccountHosting 880 + ModerationDefs_RecordHosting *ModerationDefs_RecordHosting 881 + } 882 + 883 + func (t *ModerationDefs_SubjectStatusView_Hosting) MarshalJSON() ([]byte, error) { 884 + if t.ModerationDefs_AccountHosting != nil { 885 + t.ModerationDefs_AccountHosting.LexiconTypeID = "tools.ozone.moderation.defs#accountHosting" 886 + return json.Marshal(t.ModerationDefs_AccountHosting) 887 + } 888 + if t.ModerationDefs_RecordHosting != nil { 889 + t.ModerationDefs_RecordHosting.LexiconTypeID = "tools.ozone.moderation.defs#recordHosting" 890 + return json.Marshal(t.ModerationDefs_RecordHosting) 891 + } 892 + return nil, fmt.Errorf("cannot marshal empty enum") 893 + } 894 + func (t *ModerationDefs_SubjectStatusView_Hosting) UnmarshalJSON(b []byte) error { 895 + typ, err := util.TypeExtract(b) 896 + if err != nil { 897 + return err 898 + } 899 + 900 + switch typ { 901 + case "tools.ozone.moderation.defs#accountHosting": 902 + t.ModerationDefs_AccountHosting = new(ModerationDefs_AccountHosting) 903 + return json.Unmarshal(b, t.ModerationDefs_AccountHosting) 904 + case "tools.ozone.moderation.defs#recordHosting": 905 + t.ModerationDefs_RecordHosting = new(ModerationDefs_RecordHosting) 906 + return json.Unmarshal(b, t.ModerationDefs_RecordHosting) 907 + 908 + default: 909 + return nil 910 + } 758 911 } 759 912 760 913 type ModerationDefs_SubjectStatusView_Subject struct {
+24
api/ozone/moderationemitEvent.go
··· 37 37 ModerationDefs_ModEventResolveAppeal *ModerationDefs_ModEventResolveAppeal 38 38 ModerationDefs_ModEventEmail *ModerationDefs_ModEventEmail 39 39 ModerationDefs_ModEventTag *ModerationDefs_ModEventTag 40 + ModerationDefs_AccountEvent *ModerationDefs_AccountEvent 41 + ModerationDefs_IdentityEvent *ModerationDefs_IdentityEvent 42 + ModerationDefs_RecordEvent *ModerationDefs_RecordEvent 40 43 } 41 44 42 45 func (t *ModerationEmitEvent_Input_Event) MarshalJSON() ([]byte, error) { ··· 96 99 t.ModerationDefs_ModEventTag.LexiconTypeID = "tools.ozone.moderation.defs#modEventTag" 97 100 return json.Marshal(t.ModerationDefs_ModEventTag) 98 101 } 102 + if t.ModerationDefs_AccountEvent != nil { 103 + t.ModerationDefs_AccountEvent.LexiconTypeID = "tools.ozone.moderation.defs#accountEvent" 104 + return json.Marshal(t.ModerationDefs_AccountEvent) 105 + } 106 + if t.ModerationDefs_IdentityEvent != nil { 107 + t.ModerationDefs_IdentityEvent.LexiconTypeID = "tools.ozone.moderation.defs#identityEvent" 108 + return json.Marshal(t.ModerationDefs_IdentityEvent) 109 + } 110 + if t.ModerationDefs_RecordEvent != nil { 111 + t.ModerationDefs_RecordEvent.LexiconTypeID = "tools.ozone.moderation.defs#recordEvent" 112 + return json.Marshal(t.ModerationDefs_RecordEvent) 113 + } 99 114 return nil, fmt.Errorf("cannot marshal empty enum") 100 115 } 101 116 func (t *ModerationEmitEvent_Input_Event) UnmarshalJSON(b []byte) error { ··· 147 162 case "tools.ozone.moderation.defs#modEventTag": 148 163 t.ModerationDefs_ModEventTag = new(ModerationDefs_ModEventTag) 149 164 return json.Unmarshal(b, t.ModerationDefs_ModEventTag) 165 + case "tools.ozone.moderation.defs#accountEvent": 166 + t.ModerationDefs_AccountEvent = new(ModerationDefs_AccountEvent) 167 + return json.Unmarshal(b, t.ModerationDefs_AccountEvent) 168 + case "tools.ozone.moderation.defs#identityEvent": 169 + t.ModerationDefs_IdentityEvent = new(ModerationDefs_IdentityEvent) 170 + return json.Unmarshal(b, t.ModerationDefs_IdentityEvent) 171 + case "tools.ozone.moderation.defs#recordEvent": 172 + t.ModerationDefs_RecordEvent = new(ModerationDefs_RecordEvent) 173 + return json.Unmarshal(b, t.ModerationDefs_RecordEvent) 150 174 151 175 default: 152 176 return nil
+3 -2
api/ozone/moderationqueryEvents.go
··· 21 21 // addedLabels: If specified, only events where all of these labels were added are returned 22 22 // addedTags: If specified, only events where all of these tags were added are returned 23 23 // collections: If specified, only events where the subject belongs to the given collections will be returned. When subjectType is set to 'account', this will be ignored. 24 - // comment: If specified, only events with comments containing the keyword are returned 24 + // comment: If specified, only events with comments containing the keyword are returned. Apply || separator to use multiple keywords and match using OR condition. 25 25 // createdAfter: Retrieve events created after a given timestamp 26 26 // createdBefore: Retrieve events created before a given timestamp 27 27 // hasComment: If true, only events with comments are returned ··· 31 31 // sortDirection: Sort direction for the events. Defaults to descending order of created at timestamp. 32 32 // subjectType: If specified, only events where the subject is of the given type (account or record) will be returned. When this is set to 'account' the 'collections' parameter will be ignored. When includeAllUserRecords or subject is set, this will be ignored. 33 33 // types: The types of events (fully qualified string in the format of tools.ozone.moderation.defs#modEvent<name>) to filter by. If not specified, all events are returned. 34 - func ModerationQueryEvents(ctx context.Context, c *xrpc.Client, addedLabels []string, addedTags []string, collections []string, comment string, createdAfter string, createdBefore string, createdBy string, cursor string, hasComment bool, includeAllUserRecords bool, limit int64, removedLabels []string, removedTags []string, reportTypes []string, sortDirection string, subject string, subjectType string, types []string) (*ModerationQueryEvents_Output, error) { 34 + func ModerationQueryEvents(ctx context.Context, c *xrpc.Client, addedLabels []string, addedTags []string, collections []string, comment string, createdAfter string, createdBefore string, createdBy string, cursor string, hasComment bool, includeAllUserRecords bool, limit int64, policies []string, removedLabels []string, removedTags []string, reportTypes []string, sortDirection string, subject string, subjectType string, types []string) (*ModerationQueryEvents_Output, error) { 35 35 var out ModerationQueryEvents_Output 36 36 37 37 params := map[string]interface{}{ ··· 46 46 "hasComment": hasComment, 47 47 "includeAllUserRecords": includeAllUserRecords, 48 48 "limit": limit, 49 + "policies": policies, 49 50 "removedLabels": removedLabels, 50 51 "removedTags": removedTags, 51 52 "reportTypes": reportTypes,
+17 -1
api/ozone/moderationqueryStatuses.go
··· 21 21 // appealed: Get subjects in unresolved appealed status 22 22 // collections: If specified, subjects belonging to the given collections will be returned. When subjectType is set to 'account', this will be ignored. 23 23 // comment: Search subjects by keyword from comments 24 + // hostingDeletedAfter: Search subjects where the associated record/account was deleted after a given timestamp 25 + // hostingDeletedBefore: Search subjects where the associated record/account was deleted before a given timestamp 26 + // hostingStatuses: Search subjects by the status of the associated record/account 27 + // hostingUpdatedAfter: Search subjects where the associated record/account was updated after a given timestamp 28 + // hostingUpdatedBefore: Search subjects where the associated record/account was updated before a given timestamp 24 29 // includeAllUserRecords: All subjects, or subjects from given 'collections' param, belonging to the account specified in the 'subject' param will be returned. 25 30 // includeMuted: By default, we don't include muted subjects in the results. Set this to true to include them. 26 31 // lastReviewedBy: Get all subject statuses that were reviewed by a specific moderator 27 32 // onlyMuted: When set to true, only muted subjects and reporters will be returned. 33 + // queueCount: Number of queues being used by moderators. Subjects will be split among all queues. 34 + // queueIndex: Index of the queue to fetch subjects from. Works only when queueCount value is specified. 35 + // queueSeed: A seeder to shuffle/balance the queue items. 28 36 // reportedAfter: Search subjects reported after a given timestamp 29 37 // reportedBefore: Search subjects reported before a given timestamp 30 38 // reviewState: Specify when fetching subjects in a certain state ··· 33 41 // subject: The subject to get the status for. 34 42 // subjectType: If specified, subjects of the given type (account or record) will be returned. When this is set to 'account' the 'collections' parameter will be ignored. When includeAllUserRecords or subject is set, this will be ignored. 35 43 // takendown: Get subjects that were taken down 36 - func ModerationQueryStatuses(ctx context.Context, c *xrpc.Client, appealed bool, collections []string, comment string, cursor string, excludeTags []string, ignoreSubjects []string, includeAllUserRecords bool, includeMuted bool, lastReviewedBy string, limit int64, onlyMuted bool, reportedAfter string, reportedBefore string, reviewState string, reviewedAfter string, reviewedBefore string, sortDirection string, sortField string, subject string, subjectType string, tags []string, takendown bool) (*ModerationQueryStatuses_Output, error) { 44 + func ModerationQueryStatuses(ctx context.Context, c *xrpc.Client, appealed bool, collections []string, comment string, cursor string, excludeTags []string, hostingDeletedAfter string, hostingDeletedBefore string, hostingStatuses []string, hostingUpdatedAfter string, hostingUpdatedBefore string, ignoreSubjects []string, includeAllUserRecords bool, includeMuted bool, lastReviewedBy string, limit int64, onlyMuted bool, queueCount int64, queueIndex int64, queueSeed string, reportedAfter string, reportedBefore string, reviewState string, reviewedAfter string, reviewedBefore string, sortDirection string, sortField string, subject string, subjectType string, tags []string, takendown bool) (*ModerationQueryStatuses_Output, error) { 37 45 var out ModerationQueryStatuses_Output 38 46 39 47 params := map[string]interface{}{ ··· 42 50 "comment": comment, 43 51 "cursor": cursor, 44 52 "excludeTags": excludeTags, 53 + "hostingDeletedAfter": hostingDeletedAfter, 54 + "hostingDeletedBefore": hostingDeletedBefore, 55 + "hostingStatuses": hostingStatuses, 56 + "hostingUpdatedAfter": hostingUpdatedAfter, 57 + "hostingUpdatedBefore": hostingUpdatedBefore, 45 58 "ignoreSubjects": ignoreSubjects, 46 59 "includeAllUserRecords": includeAllUserRecords, 47 60 "includeMuted": includeMuted, 48 61 "lastReviewedBy": lastReviewedBy, 49 62 "limit": limit, 50 63 "onlyMuted": onlyMuted, 64 + "queueCount": queueCount, 65 + "queueIndex": queueIndex, 66 + "queueSeed": queueSeed, 51 67 "reportedAfter": reportedAfter, 52 68 "reportedBefore": reportedBefore, 53 69 "reviewState": reviewState,
+23
api/ozone/settingdefs.go
··· 1 + // Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT. 2 + 3 + package ozone 4 + 5 + // schema: tools.ozone.setting.defs 6 + 7 + import ( 8 + "github.com/bluesky-social/indigo/lex/util" 9 + ) 10 + 11 + // SettingDefs_Option is a "option" in the tools.ozone.setting.defs schema. 12 + type SettingDefs_Option struct { 13 + CreatedAt *string `json:"createdAt,omitempty" cborgen:"createdAt,omitempty"` 14 + CreatedBy string `json:"createdBy" cborgen:"createdBy"` 15 + Description *string `json:"description,omitempty" cborgen:"description,omitempty"` 16 + Did string `json:"did" cborgen:"did"` 17 + Key string `json:"key" cborgen:"key"` 18 + LastUpdatedBy string `json:"lastUpdatedBy" cborgen:"lastUpdatedBy"` 19 + ManagerRole *string `json:"managerRole,omitempty" cborgen:"managerRole,omitempty"` 20 + Scope string `json:"scope" cborgen:"scope"` 21 + UpdatedAt *string `json:"updatedAt,omitempty" cborgen:"updatedAt,omitempty"` 22 + Value *util.LexiconTypeDecoder `json:"value" cborgen:"value"` 23 + }
+38
api/ozone/settinglistOptions.go
··· 1 + // Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT. 2 + 3 + package ozone 4 + 5 + // schema: tools.ozone.setting.listOptions 6 + 7 + import ( 8 + "context" 9 + 10 + "github.com/bluesky-social/indigo/xrpc" 11 + ) 12 + 13 + // SettingListOptions_Output is the output of a tools.ozone.setting.listOptions call. 14 + type SettingListOptions_Output struct { 15 + Cursor *string `json:"cursor,omitempty" cborgen:"cursor,omitempty"` 16 + Options []*SettingDefs_Option `json:"options" cborgen:"options"` 17 + } 18 + 19 + // SettingListOptions calls the XRPC method "tools.ozone.setting.listOptions". 20 + // 21 + // keys: Filter for only the specified keys. Ignored if prefix is provided 22 + // prefix: Filter keys by prefix 23 + func SettingListOptions(ctx context.Context, c *xrpc.Client, cursor string, keys []string, limit int64, prefix string, scope string) (*SettingListOptions_Output, error) { 24 + var out SettingListOptions_Output 25 + 26 + params := map[string]interface{}{ 27 + "cursor": cursor, 28 + "keys": keys, 29 + "limit": limit, 30 + "prefix": prefix, 31 + "scope": scope, 32 + } 33 + if err := c.Do(ctx, xrpc.Query, "", "tools.ozone.setting.listOptions", params, nil, &out); err != nil { 34 + return nil, err 35 + } 36 + 37 + return &out, nil 38 + }
+31
api/ozone/settingremoveOptions.go
··· 1 + // Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT. 2 + 3 + package ozone 4 + 5 + // schema: tools.ozone.setting.removeOptions 6 + 7 + import ( 8 + "context" 9 + 10 + "github.com/bluesky-social/indigo/xrpc" 11 + ) 12 + 13 + // SettingRemoveOptions_Input is the input argument to a tools.ozone.setting.removeOptions call. 14 + type SettingRemoveOptions_Input struct { 15 + Keys []string `json:"keys" cborgen:"keys"` 16 + Scope string `json:"scope" cborgen:"scope"` 17 + } 18 + 19 + // SettingRemoveOptions_Output is the output of a tools.ozone.setting.removeOptions call. 20 + type SettingRemoveOptions_Output struct { 21 + } 22 + 23 + // SettingRemoveOptions calls the XRPC method "tools.ozone.setting.removeOptions". 24 + func SettingRemoveOptions(ctx context.Context, c *xrpc.Client, input *SettingRemoveOptions_Input) (*SettingRemoveOptions_Output, error) { 25 + var out SettingRemoveOptions_Output 26 + if err := c.Do(ctx, xrpc.Procedure, "application/json", "tools.ozone.setting.removeOptions", nil, input, &out); err != nil { 27 + return nil, err 28 + } 29 + 30 + return &out, nil 31 + }
+36
api/ozone/settingupsertOption.go
··· 1 + // Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT. 2 + 3 + package ozone 4 + 5 + // schema: tools.ozone.setting.upsertOption 6 + 7 + import ( 8 + "context" 9 + 10 + "github.com/bluesky-social/indigo/lex/util" 11 + "github.com/bluesky-social/indigo/xrpc" 12 + ) 13 + 14 + // SettingUpsertOption_Input is the input argument to a tools.ozone.setting.upsertOption call. 15 + type SettingUpsertOption_Input struct { 16 + Description *string `json:"description,omitempty" cborgen:"description,omitempty"` 17 + Key string `json:"key" cborgen:"key"` 18 + ManagerRole *string `json:"managerRole,omitempty" cborgen:"managerRole,omitempty"` 19 + Scope string `json:"scope" cborgen:"scope"` 20 + Value *util.LexiconTypeDecoder `json:"value" cborgen:"value"` 21 + } 22 + 23 + // SettingUpsertOption_Output is the output of a tools.ozone.setting.upsertOption call. 24 + type SettingUpsertOption_Output struct { 25 + Option *SettingDefs_Option `json:"option" cborgen:"option"` 26 + } 27 + 28 + // SettingUpsertOption calls the XRPC method "tools.ozone.setting.upsertOption". 29 + func SettingUpsertOption(ctx context.Context, c *xrpc.Client, input *SettingUpsertOption_Input) (*SettingUpsertOption_Output, error) { 30 + var out SettingUpsertOption_Output 31 + if err := c.Do(ctx, xrpc.Procedure, "application/json", "tools.ozone.setting.upsertOption", nil, input, &out); err != nil { 32 + return nil, err 33 + } 34 + 35 + return &out, nil 36 + }
+2 -8
atproto/data/data.go
··· 66 66 out = append(out, v) 67 67 case []any: 68 68 for _, el := range v { 69 - down := extractBlobsAtom(el) 70 - for _, d := range down { 71 - out = append(out, d) 72 - } 69 + out = append(out, extractBlobsAtom(el)...) 73 70 } 74 71 case map[string]any: 75 72 for _, val := range v { 76 - down := extractBlobsAtom(val) 77 - for _, d := range down { 78 - out = append(out, d) 79 - } 73 + out = append(out, extractBlobsAtom(val)...) 80 74 } 81 75 default: 82 76 }
+4 -3
atproto/identity/base_directory.go
··· 46 46 ident := ParseIdentity(doc) 47 47 declared, err := ident.DeclaredHandle() 48 48 if err != nil { 49 - return nil, err 49 + return nil, fmt.Errorf("could not verify handle/DID match: %w", err) 50 50 } 51 51 if declared != h { 52 - return nil, ErrHandleMismatch 52 + return nil, fmt.Errorf("%w: %s != %s", ErrHandleMismatch, declared, h) 53 53 } 54 54 ident.Handle = declared 55 55 ··· 66 66 if errors.Is(err, ErrHandleNotDeclared) { 67 67 ident.Handle = syntax.HandleInvalid 68 68 } else if err != nil { 69 - return nil, err 69 + return nil, fmt.Errorf("could not parse handle from DID document: %w", err) 70 70 } else { 71 71 // if a handle was declared, resolve it 72 72 resolvedDID, err := d.ResolveHandle(ctx, declared) ··· 99 99 } 100 100 101 101 func (d *BaseDirectory) Purge(ctx context.Context, a syntax.AtIdentifier) error { 102 + // BaseDirectory itself does not implement caching 102 103 return nil 103 104 }
+3 -3
atproto/identity/cache_directory.go
··· 93 93 94 94 func (d *CacheDirectory) ResolveHandle(ctx context.Context, h syntax.Handle) (syntax.DID, error) { 95 95 if h.IsInvalidHandle() { 96 - return "", fmt.Errorf("invalid handle") 96 + return "", fmt.Errorf("can not resolve handle: %w", ErrInvalidHandle) 97 97 } 98 98 entry, ok := d.handleCache.Get(h) 99 99 if ok && !d.IsHandleStale(&entry) { ··· 230 230 231 231 declared, err := ident.DeclaredHandle() 232 232 if err != nil { 233 - return nil, hit, err 233 + return nil, hit, fmt.Errorf("could not verify handle/DID mapping: %w", err) 234 234 } 235 235 if declared != h { 236 - return nil, hit, ErrHandleMismatch 236 + return nil, hit, fmt.Errorf("%w: %s != %s", ErrHandleMismatch, declared, h) 237 237 } 238 238 return ident, hit, nil 239 239 }
-20
atproto/identity/did.go
··· 13 13 "github.com/bluesky-social/indigo/atproto/syntax" 14 14 ) 15 15 16 - type DIDDocument struct { 17 - DID syntax.DID `json:"id"` 18 - AlsoKnownAs []string `json:"alsoKnownAs,omitempty"` 19 - VerificationMethod []DocVerificationMethod `json:"verificationMethod,omitempty"` 20 - Service []DocService `json:"service,omitempty"` 21 - } 22 - 23 - type DocVerificationMethod struct { 24 - ID string `json:"id"` 25 - Type string `json:"type"` 26 - Controller string `json:"controller"` 27 - PublicKeyMultibase string `json:"publicKeyMultibase"` 28 - } 29 - 30 - type DocService struct { 31 - ID string `json:"id"` 32 - Type string `json:"type"` 33 - ServiceEndpoint string `json:"serviceEndpoint"` 34 - } 35 - 36 16 // WARNING: this does *not* bi-directionally verify account metadata; it only implements direct DID-to-DID-document lookup for the supported DID methods, and parses the resulting DID Doc into an Identity struct 37 17 func (d *BaseDirectory) ResolveDID(ctx context.Context, did syntax.DID) (*DIDDocument, error) { 38 18 start := time.Now()
atproto/identity/did_test.go atproto/identity/diddoc_text.go
+25
atproto/identity/diddoc.go
··· 1 + package identity 2 + 3 + import ( 4 + "github.com/bluesky-social/indigo/atproto/syntax" 5 + ) 6 + 7 + type DIDDocument struct { 8 + DID syntax.DID `json:"id"` 9 + AlsoKnownAs []string `json:"alsoKnownAs,omitempty"` 10 + VerificationMethod []DocVerificationMethod `json:"verificationMethod,omitempty"` 11 + Service []DocService `json:"service,omitempty"` 12 + } 13 + 14 + type DocVerificationMethod struct { 15 + ID string `json:"id"` 16 + Type string `json:"type"` 17 + Controller string `json:"controller"` 18 + PublicKeyMultibase string `json:"publicKeyMultibase"` 19 + } 20 + 21 + type DocService struct { 22 + ID string `json:"id"` 23 + Type string `json:"type"` 24 + ServiceEndpoint string `json:"serviceEndpoint"` 25 + }
+84
atproto/identity/directory.go
··· 1 + package identity 2 + 3 + import ( 4 + "context" 5 + "errors" 6 + "net" 7 + "net/http" 8 + "time" 9 + 10 + "github.com/bluesky-social/indigo/atproto/syntax" 11 + ) 12 + 13 + // API for doing account lookups by DID or handle, with bi-directional verification handled automatically. Almost all atproto services and clients should use an implementation of this interface instead of resolving handles or DIDs separately 14 + // 15 + // Handles which fail to resolve, or don't match DID alsoKnownAs, are an error. DIDs which resolve but the handle does not resolve back to the DID return an Identity where the Handle is the special `handle.invalid` value. 16 + // 17 + // Some example implementations of this interface could be: 18 + // - basic direct resolution on every call 19 + // - local in-memory caching layer to reduce network hits 20 + // - API client, which just makes requests to PDS (or other remote service) 21 + // - client for shared network cache (eg, Redis) 22 + type Directory interface { 23 + LookupHandle(ctx context.Context, h syntax.Handle) (*Identity, error) 24 + LookupDID(ctx context.Context, d syntax.DID) (*Identity, error) 25 + Lookup(ctx context.Context, i syntax.AtIdentifier) (*Identity, error) 26 + 27 + // Flushes any cache of the indicated identifier. If directory is not using caching, can ignore this. 28 + Purge(ctx context.Context, i syntax.AtIdentifier) error 29 + } 30 + 31 + // Indicates that handle resolution failed. A wrapped error may provide more context. This is only returned when looking up a handle, not when looking up a DID. 32 + var ErrHandleResolutionFailed = errors.New("handle resolution failed") 33 + 34 + // Indicates that resolution process completed successfully, but handle does not exist. This is only returned when looking up a handle, not when looking up a DID. 35 + var ErrHandleNotFound = errors.New("handle not found") 36 + 37 + // Indicates that resolution process completed successfully, handle mapped to a different DID. This is only returned when looking up a handle, not when looking up a DID. 38 + var ErrHandleMismatch = errors.New("handle/DID mismatch") 39 + 40 + // Indicates that DID document did not include any handle ("alsoKnownAs"). This is only returned when looking up a handle, not when looking up a DID. 41 + var ErrHandleNotDeclared = errors.New("DID document did not declare a handle") 42 + 43 + // Handle top-level domain (TLD) is one of the special "Reserved" suffixes, and not allowed for atproto use 44 + var ErrHandleReservedTLD = errors.New("handle top-level domain is disallowed") 45 + 46 + // Indicates that resolution process completed successfully, but the DID does not exist. 47 + var ErrDIDNotFound = errors.New("DID not found") 48 + 49 + // Indicates that DID resolution process failed. A wrapped error may provide more context. 50 + var ErrDIDResolutionFailed = errors.New("DID resolution failed") 51 + 52 + // Indicates that DID document did not include a public key with the specified ID 53 + var ErrKeyNotDeclared = errors.New("DID document did not declare a relevant public key") 54 + 55 + // Handle was invalid, in a situation where a valid handle is required. 56 + var ErrInvalidHandle = errors.New("Invalid Handle") 57 + 58 + var DefaultPLCURL = "https://plc.directory" 59 + 60 + // Returns a reasonable Directory implementation for applications 61 + func DefaultDirectory() Directory { 62 + base := BaseDirectory{ 63 + PLCURL: DefaultPLCURL, 64 + HTTPClient: http.Client{ 65 + Timeout: time.Second * 10, 66 + Transport: &http.Transport{ 67 + // would want this around 100ms for services doing lots of handle resolution. Impacts PLC connections as well, but not too bad. 68 + IdleConnTimeout: time.Millisecond * 1000, 69 + MaxIdleConns: 100, 70 + }, 71 + }, 72 + Resolver: net.Resolver{ 73 + Dial: func(ctx context.Context, network, address string) (net.Conn, error) { 74 + d := net.Dialer{Timeout: time.Second * 3} 75 + return d.DialContext(ctx, network, address) 76 + }, 77 + }, 78 + TryAuthoritativeDNS: true, 79 + // primary Bluesky PDS instance only supports HTTP resolution method 80 + SkipDNSDomainSuffixes: []string{".bsky.social"}, 81 + } 82 + cached := NewCacheDirectory(&base, 250_000, time.Hour*24, time.Minute*2, time.Minute*5) 83 + return &cached 84 + }
+8 -4
atproto/identity/handle.go
··· 35 35 var dnsErr *net.DNSError 36 36 if errors.As(err, &dnsErr) { 37 37 if dnsErr.IsNotFound { 38 - return "", ErrHandleNotFound 38 + return "", fmt.Errorf("%w: %s", ErrHandleNotFound, handle) 39 39 } 40 40 } 41 41 if err != nil { ··· 138 138 var dnsErr *net.DNSError 139 139 if errors.As(err, &dnsErr) { 140 140 if dnsErr.IsNotFound { 141 - return "", fmt.Errorf("%w: DNS NXDOMAIN for %s", ErrHandleNotFound, handle) 141 + return "", fmt.Errorf("%w: DNS NXDOMAIN for HTTP well-known resolution of %s", ErrHandleNotFound, handle) 142 142 } 143 143 } 144 144 return "", fmt.Errorf("%w: HTTP well-known request error: %w", ErrHandleResolutionFailed, err) ··· 160 160 return "", fmt.Errorf("%w: HTTP well-known body read for %s: %w", ErrHandleResolutionFailed, handle, err) 161 161 } 162 162 line := strings.TrimSpace(string(b)) 163 - return syntax.ParseDID(line) 163 + outDid, err := syntax.ParseDID(line) 164 + if err != nil { 165 + return outDid, fmt.Errorf("%w: invalid DID in HTTP well-known for %s", ErrHandleResolutionFailed, handle) 166 + } 167 + return outDid, err 164 168 } 165 169 166 170 func (d *BaseDirectory) ResolveHandle(ctx context.Context, handle syntax.Handle) (syntax.DID, error) { ··· 169 173 var did syntax.DID 170 174 171 175 if handle.IsInvalidHandle() { 172 - return "", fmt.Errorf("invalid handle") 176 + return "", fmt.Errorf("can not resolve handle: %w", ErrInvalidHandle) 173 177 } 174 178 175 179 if !handle.AllowedTLD() {
-75
atproto/identity/identity.go
··· 1 1 package identity 2 2 3 3 import ( 4 - "context" 5 - "errors" 6 4 "fmt" 7 - "net" 8 - "net/http" 9 5 "net/url" 10 6 "strings" 11 - "time" 12 7 13 8 "github.com/bluesky-social/indigo/atproto/crypto" 14 9 "github.com/bluesky-social/indigo/atproto/syntax" 15 10 16 11 "github.com/mr-tron/base58" 17 12 ) 18 - 19 - // API for doing account lookups by DID or handle, with bi-directional verification handled automatically. Almost all atproto services and clients should use an implementation of this interface instead of resolving handles or DIDs separately 20 - // 21 - // Handles which fail to resolve, or don't match DID alsoKnownAs, are an error. DIDs which resolve but the handle does not resolve back to the DID return an Identity where the Handle is the special `handle.invalid` value. 22 - // 23 - // Some example implementations of this interface could be: 24 - // - basic direct resolution on every call 25 - // - local in-memory caching layer to reduce network hits 26 - // - API client, which just makes requests to PDS (or other remote service) 27 - // - client for shared network cache (eg, Redis) 28 - type Directory interface { 29 - LookupHandle(ctx context.Context, h syntax.Handle) (*Identity, error) 30 - LookupDID(ctx context.Context, d syntax.DID) (*Identity, error) 31 - Lookup(ctx context.Context, i syntax.AtIdentifier) (*Identity, error) 32 - 33 - // Flushes any cache of the indicated identifier. If directory is not using caching, can ignore this. 34 - Purge(ctx context.Context, i syntax.AtIdentifier) error 35 - } 36 - 37 - // Indicates that handle resolution failed. A wrapped error may provide more context. This is only returned when looking up a handle, not when looking up a DID. 38 - var ErrHandleResolutionFailed = errors.New("handle resolution failed") 39 - 40 - // Indicates that resolution process completed successfully, but handle does not exist. This is only returned when looking up a handle, not when looking up a DID. 41 - var ErrHandleNotFound = errors.New("handle not found") 42 - 43 - // Indicates that resolution process completed successfully, handle mapped to a different DID. This is only returned when looking up a handle, not when looking up a DID. 44 - var ErrHandleMismatch = errors.New("handle/DID mismatch") 45 - 46 - // Indicates that DID document did not include any handle ("alsoKnownAs"). This is only returned when looking up a handle, not when looking up a DID. 47 - var ErrHandleNotDeclared = errors.New("DID document did not declare a handle") 48 - 49 - // Handle top-level domain (TLD) is one of the special "Reserved" suffixes, and not allowed for atproto use 50 - var ErrHandleReservedTLD = errors.New("handle top-level domain is disallowed") 51 - 52 - // Indicates that resolution process completed successfully, but the DID does not exist. 53 - var ErrDIDNotFound = errors.New("DID not found") 54 - 55 - // Indicates that DID resolution process failed. A wrapped error may provide more context. 56 - var ErrDIDResolutionFailed = errors.New("DID resolution failed") 57 - 58 - // Indicates that DID document did not include a public key with the specified ID 59 - var ErrKeyNotDeclared = errors.New("DID document did not declare a relevant public key") 60 - 61 - var DefaultPLCURL = "https://plc.directory" 62 - 63 - // Returns a reasonable Directory implementation for applications 64 - func DefaultDirectory() Directory { 65 - base := BaseDirectory{ 66 - PLCURL: DefaultPLCURL, 67 - HTTPClient: http.Client{ 68 - Timeout: time.Second * 10, 69 - Transport: &http.Transport{ 70 - // would want this around 100ms for services doing lots of handle resolution. Impacts PLC connections as well, but not too bad. 71 - IdleConnTimeout: time.Millisecond * 1000, 72 - MaxIdleConns: 100, 73 - }, 74 - }, 75 - Resolver: net.Resolver{ 76 - Dial: func(ctx context.Context, network, address string) (net.Conn, error) { 77 - d := net.Dialer{Timeout: time.Second * 3} 78 - return d.DialContext(ctx, network, address) 79 - }, 80 - }, 81 - TryAuthoritativeDNS: true, 82 - // primary Bluesky PDS instance only supports HTTP resolution method 83 - SkipDNSDomainSuffixes: []string{".bsky.social"}, 84 - } 85 - cached := NewCacheDirectory(&base, 250_000, time.Hour*24, time.Minute*2, time.Minute*5) 86 - return &cached 87 - } 88 13 89 14 // Represents an atproto identity. Could be a regular user account, or a service account (eg, feed generator) 90 15 type Identity struct {
+12 -12
atproto/identity/redisdir/redis_directory.go
··· 57 57 func NewRedisDirectory(inner identity.Directory, redisURL string, hitTTL, errTTL, invalidHandleTTL time.Duration, lruSize int) (*RedisDirectory, error) { 58 58 opt, err := redis.ParseURL(redisURL) 59 59 if err != nil { 60 - return nil, err 60 + return nil, fmt.Errorf("could not configure redis identity cache: %w", err) 61 61 } 62 62 rdb := redis.NewClient(opt) 63 63 // check redis connection 64 64 _, err = rdb.Ping(context.TODO()).Result() 65 65 if err != nil { 66 - return nil, err 66 + return nil, fmt.Errorf("could not connect to redis identity cache: %w", err) 67 67 } 68 68 handleCache := cache.New(&cache.Options{ 69 69 Redis: rdb, ··· 117 117 }) 118 118 if err != nil { 119 119 he.DID = nil 120 - he.Err = fmt.Errorf("identity cache write: %w", err) 120 + he.Err = fmt.Errorf("identity cache write failed: %w", err) 121 121 return he 122 122 } 123 123 return he ··· 142 142 }) 143 143 if err != nil { 144 144 he.DID = nil 145 - he.Err = fmt.Errorf("identity cache write: %w", err) 145 + he.Err = fmt.Errorf("identity cache write failed: %w", err) 146 146 return he 147 147 } 148 148 err = d.handleCache.Set(&cache.Item{ ··· 153 153 }) 154 154 if err != nil { 155 155 he.DID = nil 156 - he.Err = fmt.Errorf("identity cache write: %w", err) 156 + he.Err = fmt.Errorf("identity cache write failed: %w", err) 157 157 return he 158 158 } 159 159 return he ··· 161 161 162 162 func (d *RedisDirectory) ResolveHandle(ctx context.Context, h syntax.Handle) (syntax.DID, error) { 163 163 if h.IsInvalidHandle() { 164 - return "", errors.New("invalid handle") 164 + return "", fmt.Errorf("can not resolve handle: %w", identity.ErrInvalidHandle) 165 165 } 166 166 var entry handleEntry 167 167 err := d.handleCache.Get(ctx, redisDirPrefix+h.String(), &entry) 168 168 if err != nil && err != cache.ErrCacheMiss { 169 - return "", fmt.Errorf("identity cache read: %w", err) 169 + return "", fmt.Errorf("identity cache read failed: %w", err) 170 170 } 171 171 if err == nil && !d.isHandleStale(&entry) { // if no error... 172 172 handleCacheHits.Inc() ··· 191 191 // The result should now be in the cache 192 192 err := d.handleCache.Get(ctx, redisDirPrefix+h.String(), entry) 193 193 if err != nil && err != cache.ErrCacheMiss { 194 - return "", fmt.Errorf("identity cache read: %w", err) 194 + return "", fmt.Errorf("identity cache read failed: %w", err) 195 195 } 196 196 if err == nil && !d.isHandleStale(&entry) { // if no error... 197 197 if entry.Err != nil { ··· 251 251 }) 252 252 if err != nil { 253 253 entry.Identity = nil 254 - entry.Err = fmt.Errorf("identity cache write: %v", err) 254 + entry.Err = fmt.Errorf("identity cache write failed: %w", err) 255 255 return entry 256 256 } 257 257 if he != nil { ··· 263 263 }) 264 264 if err != nil { 265 265 entry.Identity = nil 266 - entry.Err = fmt.Errorf("identity cache write: %v", err) 266 + entry.Err = fmt.Errorf("identity cache write failed: %w", err) 267 267 return entry 268 268 } 269 269 } ··· 279 279 var entry identityEntry 280 280 err := d.identityCache.Get(ctx, redisDirPrefix+did.String(), &entry) 281 281 if err != nil && err != cache.ErrCacheMiss { 282 - return nil, false, fmt.Errorf("identity cache read: %v", err) 282 + return nil, false, fmt.Errorf("identity cache read failed: %w", err) 283 283 } 284 284 if err == nil && !d.isIdentityStale(&entry) { // if no error... 285 285 identityCacheHits.Inc() ··· 298 298 // The result should now be in the cache 299 299 err = d.identityCache.Get(ctx, redisDirPrefix+did.String(), &entry) 300 300 if err != nil && err != cache.ErrCacheMiss { 301 - return nil, false, fmt.Errorf("identity cache read: %v", err) 301 + return nil, false, fmt.Errorf("identity cache read failed: %w", err) 302 302 } 303 303 if err == nil && !d.isIdentityStale(&entry) { // if no error... 304 304 return entry.Identity, false, entry.Err
+1 -1
atproto/lexicon/language.go
··· 296 296 297 297 func (s *SchemaRecord) CheckSchema() error { 298 298 switch s.Key { 299 - case "tid", "any": 299 + case "tid", "nsid", "any": 300 300 // pass 301 301 default: 302 302 if !strings.HasPrefix(s.Key, "literal:") {
+21
atproto/syntax/cmd/atp-syntax/main.go
··· 22 22 ArgsUsage: "<tid>", 23 23 Action: runParseTID, 24 24 }, 25 + &cli.Command{ 26 + Name: "parse-did", 27 + Usage: "parse a DID", 28 + ArgsUsage: "<did>", 29 + Action: runParseDID, 30 + }, 25 31 } 26 32 h := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug}) 27 33 slog.SetDefault(slog.New(h)) ··· 43 49 44 50 return nil 45 51 } 52 + 53 + func runParseDID(cctx *cli.Context) error { 54 + s := cctx.Args().First() 55 + if s == "" { 56 + return fmt.Errorf("need to provide identifier as an argument") 57 + } 58 + 59 + did, err := syntax.ParseDID(s) 60 + if err != nil { 61 + return err 62 + } 63 + fmt.Printf("%s\n", did) 64 + 65 + return nil 66 + }
+22
atproto/syntax/did.go
··· 14 14 type DID string 15 15 16 16 var didRegex = regexp.MustCompile(`^did:[a-z]+:[a-zA-Z0-9._:%-]*[a-zA-Z0-9._-]$`) 17 + var plcChars = "" 18 + 19 + func isASCIIAlphaNum(c rune) bool { 20 + if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') { 21 + return true 22 + } 23 + return false 24 + } 17 25 18 26 func ParseDID(raw string) (DID, error) { 27 + // fast-path for did:plc, avoiding regex 28 + if len(raw) == 32 && strings.HasPrefix(raw, "did:plc:") { 29 + // NOTE: this doesn't really check base32, just broader alphanumberic. might pass invalid PLC DIDs, but they still have overall valid DID syntax 30 + isPlc := true 31 + for _, c := range raw[8:32] { 32 + if !isASCIIAlphaNum(c) { 33 + isPlc = false 34 + break 35 + } 36 + } 37 + if isPlc { 38 + return DID(raw), nil 39 + } 40 + } 19 41 if raw == "" { 20 42 return "", errors.New("expected DID, got empty string") 21 43 }
+26
atproto/syntax/path.go
··· 1 + package syntax 2 + 3 + import ( 4 + "errors" 5 + "fmt" 6 + "strings" 7 + ) 8 + 9 + // Parses an atproto repo path string in to "collection" (NSID) and record key parts. 10 + // 11 + // Does not return partial success: either both collection and record key are complete (and error is nil), or both are empty string (and error is not nil) 12 + func ParseRepoPath(raw string) (NSID, RecordKey, error) { 13 + parts := strings.SplitN(raw, "/", 3) 14 + if len(parts) != 2 { 15 + return "", "", errors.New("expected path to have two parts, separated by single slash") 16 + } 17 + nsid, err := ParseNSID(parts[0]) 18 + if err != nil { 19 + return "", "", fmt.Errorf("collection part of path not a valid NSID: %w", err) 20 + } 21 + rkey, err := ParseRecordKey(parts[1]) 22 + if err != nil { 23 + return "", "", fmt.Errorf("record key part of path not valid: %w", err) 24 + } 25 + return nsid, rkey, nil 26 + }
+41
atproto/syntax/path_test.go
··· 1 + package syntax 2 + 3 + import ( 4 + "testing" 5 + 6 + "github.com/stretchr/testify/assert" 7 + ) 8 + 9 + func TestRepoPath(t *testing.T) { 10 + assert := assert.New(t) 11 + 12 + testValid := [][]string{ 13 + {"app.bsky.feed.post/asdf", "app.bsky.feed.post", "asdf"}, 14 + } 15 + 16 + testErr := []string{ 17 + "", 18 + "/", 19 + "/app.bsky.feed.post/asdf", 20 + "/asdf", 21 + "./app.bsky.feed.post", 22 + "blob/asdf", 23 + "app.bsky.feed.post/", 24 + "app.bsky.feed.post/.", 25 + "app.bsky.feed.post/!", 26 + } 27 + 28 + for _, parts := range testValid { 29 + nsid, rkey, err := ParseRepoPath(parts[0]) 30 + assert.NoError(err) 31 + assert.Equal(parts[1], nsid.String()) 32 + assert.Equal(parts[2], rkey.String()) 33 + } 34 + 35 + for _, raw := range testErr { 36 + nsid, rkey, err := ParseRepoPath(raw) 37 + assert.Error(err) 38 + assert.Equal("", nsid.String()) 39 + assert.Equal("", rkey.String()) 40 + } 41 + }
+3 -3
automod/consumer/firehose.go
··· 116 116 fc.Logger.Info("hepa scheduler configured", "scheduler", "autoscaling", "initial", scaleSettings.Concurrency, "max", scaleSettings.MaxConcurrency) 117 117 } 118 118 119 - return events.HandleRepoStream(ctx, con, scheduler) 119 + return events.HandleRepoStream(ctx, con, scheduler, fc.Logger) 120 120 } 121 121 122 122 // NOTE: for now, this function basically never errors, just logs and returns nil. Should think through error processing better. ··· 144 144 145 145 for _, op := range evt.Ops { 146 146 logger = logger.With("eventKind", op.Action, "path", op.Path) 147 - collection, rkey, err := splitRepoPath(op.Path) 147 + collection, rkey, err := syntax.ParseRepoPath(op.Path) 148 148 if err != nil { 149 - logger.Error("invalid path in repo op") 149 + logger.Error("invalid path in repo op", "err", err) 150 150 return nil 151 151 } 152 152
+1
automod/consumer/ozone.go
··· 69 69 false, // hasComment bool 70 70 true, // includeAllUserRecords bool 71 71 limit, // limit int64 72 + nil, // policies []string 72 73 nil, // removedLabels []string 73 74 nil, // removedTags []string 74 75 nil, // reportTypes []string
-25
automod/consumer/util.go
··· 1 - package consumer 2 - 3 - import ( 4 - "fmt" 5 - "strings" 6 - 7 - "github.com/bluesky-social/indigo/atproto/syntax" 8 - ) 9 - 10 - // TODO: move this to a "ParsePath" helper in syntax package? 11 - func splitRepoPath(path string) (syntax.NSID, syntax.RecordKey, error) { 12 - parts := strings.SplitN(path, "/", 3) 13 - if len(parts) != 2 { 14 - return "", "", fmt.Errorf("invalid record path: %s", path) 15 - } 16 - collection, err := syntax.ParseNSID(parts[0]) 17 - if err != nil { 18 - return "", "", err 19 - } 20 - rkey, err := syntax.ParseRecordKey(parts[1]) 21 - if err != nil { 22 - return "", "", err 23 - } 24 - return collection, rkey, nil 25 - }
+4 -4
automod/engine/circuit_breaker_test.go
··· 44 44 p1cbor := p1buf.Bytes() 45 45 46 46 // generate double the quote of events; expect to only count the quote worth of actions 47 - for i := 0; i < 2*QuotaModTakedownDay; i++ { 47 + for i := 0; i < 2*eng.Config.QuotaModTakedownDay; i++ { 48 48 ident := identity.Identity{ 49 49 DID: syntax.DID(fmt.Sprintf("did:plc:abc%d", i)), 50 50 Handle: syntax.Handle("handle.example.com"), ··· 63 63 64 64 takedowns, err := eng.Counters.GetCount(ctx, "automod-quota", "takedown", countstore.PeriodDay) 65 65 assert.NoError(err) 66 - assert.Equal(QuotaModTakedownDay, takedowns) 66 + assert.Equal(eng.Config.QuotaModTakedownDay, takedowns) 67 67 68 68 reports, err := eng.Counters.GetCount(ctx, "automod-quota", "report", countstore.PeriodDay) 69 69 assert.NoError(err) ··· 89 89 p1cbor := p1buf.Bytes() 90 90 91 91 // generate double the quota of events; expect to only count the quota worth of actions 92 - for i := 0; i < 2*QuotaModReportDay; i++ { 92 + for i := 0; i < 2*eng.Config.QuotaModReportDay; i++ { 93 93 ident := identity.Identity{ 94 94 DID: syntax.DID(fmt.Sprintf("did:plc:abc%d", i)), 95 95 Handle: syntax.Handle("handle.example.com"), ··· 112 112 113 113 reports, err := eng.Counters.GetCount(ctx, "automod-quota", "report", countstore.PeriodDay) 114 114 assert.NoError(err) 115 - assert.Equal(QuotaModReportDay, reports) 115 + assert.Equal(eng.Config.QuotaModReportDay, reports) 116 116 }
-12
automod/engine/effects.go
··· 2 2 3 3 import ( 4 4 "sync" 5 - "time" 6 - ) 7 - 8 - var ( 9 - // time period within which automod will not re-report an account for the same reasonType 10 - ReportDupePeriod = 1 * 24 * time.Hour 11 - // number of reports automod can file per day, for all subjects and types combined (circuit breaker) 12 - QuotaModReportDay = 2000 13 - // number of takedowns automod can action per day, for all subjects combined (circuit breaker) 14 - QuotaModTakedownDay = 200 15 - // number of misc actions automod can do per day, for all subjects combined (circuit breaker) 16 - QuotaModActionDay = 1000 17 5 ) 18 6 19 7 type CounterRef struct {
+8
automod/engine/engine.go
··· 52 52 type EngineConfig struct { 53 53 // if enabled, account metadata is not hydrated for every event by default 54 54 SkipAccountMeta bool 55 + // time period within which automod will not re-report an account for the same reasonType 56 + ReportDupePeriod time.Duration 57 + // number of reports automod can file per day, for all subjects and types combined (circuit breaker) 58 + QuotaModReportDay int 59 + // number of takedowns automod can action per day, for all subjects combined (circuit breaker) 60 + QuotaModTakedownDay int 61 + // number of misc actions automod can do per day, for all subjects combined (circuit breaker) 62 + QuotaModActionDay int 55 63 } 56 64 57 65 // Entrypoint for external code pushing #identity events in to the engine.
+6 -4
automod/engine/fetch_account_meta.go
··· 139 139 ap.AccountTags = dedupeStrings(rd.Moderation.SubjectStatus.Tags) 140 140 if rd.Moderation.SubjectStatus.ReviewState != nil { 141 141 switch *rd.Moderation.SubjectStatus.ReviewState { 142 - case "#reviewOpen": 142 + case "tools.ozone.moderation.defs#reviewOpen": 143 143 ap.ReviewState = ReviewStateOpen 144 - case "#reviewEscalated": 144 + case "tools.ozone.moderation.defs#reviewEscalated": 145 145 ap.ReviewState = ReviewStateEscalated 146 - case "#reviewClosed": 146 + case "tools.ozone.moderation.defs#reviewClosed": 147 147 ap.ReviewState = ReviewStateClosed 148 - case "#reviewNonde": 148 + case "tools.ozone.moderation.defs#reviewNone": 149 149 ap.ReviewState = ReviewStateNone 150 + default: 151 + logger.Warn("unexpected ozone moderation review state", "state", rd.Moderation.SubjectStatus.ReviewState, "did", ident.DID) 150 152 } 151 153 } 152 154 }
+28 -5
automod/engine/persisthelpers.go
··· 98 98 if err != nil { 99 99 return nil, fmt.Errorf("checking report action quota: %w", err) 100 100 } 101 - if c >= QuotaModReportDay { 101 + 102 + quotaModReportDay := eng.Config.QuotaModReportDay 103 + if quotaModReportDay == 0 { 104 + quotaModReportDay = 10000 105 + } 106 + if c >= quotaModReportDay { 102 107 eng.Logger.Warn("CIRCUIT BREAKER: automod reports") 103 108 return []ModReport{}, nil 104 109 } ··· 117 122 if err != nil { 118 123 return false, fmt.Errorf("checking takedown action quota: %w", err) 119 124 } 120 - if c >= QuotaModTakedownDay { 125 + quotaModTakedownDay := eng.Config.QuotaModTakedownDay 126 + if quotaModTakedownDay == 0 { 127 + quotaModTakedownDay = 200 128 + } 129 + if c >= quotaModTakedownDay { 121 130 eng.Logger.Warn("CIRCUIT BREAKER: automod takedowns") 122 131 return false, nil 123 132 } ··· 137 146 if err != nil { 138 147 return false, fmt.Errorf("checking mod action quota: %w", err) 139 148 } 140 - if c >= QuotaModActionDay { 149 + quotaModActionDay := eng.Config.QuotaModActionDay 150 + if quotaModActionDay == 0 { 151 + quotaModActionDay = 2000 152 + } 153 + if c >= quotaModActionDay { 141 154 eng.Logger.Warn("CIRCUIT BREAKER: automod action") 142 155 return false, nil 143 156 } ··· 169 182 false, // hasComment bool 170 183 false, // includeAllUserRecords bool 171 184 5, // limit int64 185 + nil, // policies []string 172 186 nil, // removedLabels []string 173 187 nil, // removedTags []string 174 188 nil, // reportTypes []string ··· 191 205 if err != nil { 192 206 return false, err 193 207 } 194 - if time.Since(created.Time()) > ReportDupePeriod { 208 + reportDupePeriod := eng.Config.ReportDupePeriod 209 + if reportDupePeriod == 0 { 210 + reportDupePeriod = 1 * 24 * time.Hour 211 + } 212 + if time.Since(created.Time()) > reportDupePeriod { 195 213 continue 196 214 } 197 215 ··· 246 264 false, // hasComment bool 247 265 false, // includeAllUserRecords bool 248 266 5, // limit int64 267 + nil, // policies []string 249 268 nil, // removedLabels []string 250 269 nil, // removedTags []string 251 270 nil, // reportTypes []string ··· 267 286 if err != nil { 268 287 return false, err 269 288 } 270 - if time.Since(created.Time()) > ReportDupePeriod { 289 + reportDupePeriod := eng.Config.ReportDupePeriod 290 + if reportDupePeriod == 0 { 291 + reportDupePeriod = 1 * 24 * time.Hour 292 + } 293 + if time.Since(created.Time()) > reportDupePeriod { 271 294 continue 272 295 } 273 296
+1 -1
automod/engine/slack.go
··· 86 86 msg += fmt.Sprintf("Report `%s`: %s\n", rep.ReasonType, rep.Comment) 87 87 } 88 88 if newTakedown { 89 - msg += fmt.Sprintf("Takedown!\n") 89 + msg += "Takedown!\n" 90 90 } 91 91 return msg 92 92 }
+1 -3
automod/flagstore/flagstore_mem.go
··· 27 27 if !ok { 28 28 v = []string{} 29 29 } 30 - for _, f := range flags { 31 - v = append(v, f) 32 - } 30 + v = append(v, flags...) 33 31 v = dedupeStrings(v) 34 32 s.Data[key] = v 35 33 return nil
+1 -3
automod/helpers/bsky.go
··· 11 11 12 12 func ExtractHashtagsPost(post *appbsky.FeedPost) []string { 13 13 var tags []string 14 - for _, tag := range post.Tags { 15 - tags = append(tags, tag) 16 - } 14 + tags = append(tags, post.Tags...) 17 15 for _, facet := range post.Facets { 18 16 for _, feat := range facet.Features { 19 17 if feat.RichtextFacet_Tag != nil {
+1 -1
automod/rules/harassment.go
··· 130 130 131 131 if count > 5 { 132 132 //c.AddRecordFlag("trivial-harassing-post") 133 - c.ReportAccount(automod.ReportReasonOther, fmt.Sprintf("possible targetted harassment (also labeled; remove label if this isn't harassment!)")) 133 + c.ReportAccount(automod.ReportReasonOther, "possible targetted harassment (also labeled; remove label if this isn't harassment!)") 134 134 c.AddAccountLabel("!hide") 135 135 c.Notify("slack") 136 136 }
+1 -2
automod/rules/nostr.go
··· 1 1 package rules 2 2 3 3 import ( 4 - "fmt" 5 4 "strings" 6 5 "time" 7 6 ··· 37 36 return nil 38 37 } 39 38 40 - c.ReportAccount(automod.ReportReasonOther, fmt.Sprintf("likely nostr spam account (also labeled; remove label if this isn't spam!)")) 39 + c.ReportAccount(automod.ReportReasonOther, "likely nostr spam account (also labeled; remove label if this isn't spam!)") 41 40 c.AddAccountLabel("!hide") 42 41 c.Notify("slack") 43 42 return nil
+1 -2
automod/rules/promo.go
··· 1 1 package rules 2 2 3 3 import ( 4 - "fmt" 5 4 "net/url" 6 5 "strings" 7 6 "time" ··· 54 53 uniqueReplies := c.GetCountDistinct("reply-to", did, countstore.PeriodDay) 55 54 if uniqueReplies >= 10 { 56 55 c.AddAccountFlag("promo-multi-reply") 57 - c.ReportAccount(automod.ReportReasonSpam, fmt.Sprintf("possible aggressive self-promotion")) 56 + c.ReportAccount(automod.ReportReasonSpam, "possible aggressive self-promotion") 58 57 c.Notify("slack") 59 58 } 60 59
+2 -2
automod/rules/quick.go
··· 29 29 } 30 30 if strings.Contains(*profile.Description, "🏈🍕🌀") { 31 31 c.AddAccountFlag("profile-bot-string") 32 - c.ReportAccount(automod.ReportReasonSpam, fmt.Sprintf("possible bot based on string in profile")) 32 + c.ReportAccount(automod.ReportReasonSpam, "possible bot based on string in profile") 33 33 c.Notify("slack") 34 34 return nil 35 35 } ··· 89 89 return nil 90 90 } 91 91 92 - c.ReportAccount(automod.ReportReasonOther, fmt.Sprintf("trivial spam account (also labeled; remove label if this isn't spam!)")) 92 + c.ReportAccount(automod.ReportReasonOther, "trivial spam account (also labeled; remove label if this isn't spam!)") 93 93 c.AddAccountLabel("!hide") 94 94 c.Notify("slack") 95 95 return nil
+68 -33
backfill/backfill.go
··· 12 12 "time" 13 13 14 14 "github.com/bluesky-social/indigo/api/atproto" 15 + "github.com/bluesky-social/indigo/atproto/identity" 16 + "github.com/bluesky-social/indigo/atproto/syntax" 15 17 "github.com/bluesky-social/indigo/repo" 16 18 "github.com/bluesky-social/indigo/repomgr" 19 + 17 20 "github.com/ipfs/go-cid" 18 21 "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" 19 22 "go.opentelemetry.io/otel" ··· 71 74 ParallelRecordCreates int 72 75 // Prefix match for records to backfill i.e. app.bsky.feed.app/ 73 76 // If empty, all records will be backfilled 74 - NSIDFilter string 75 - CheckoutPath string 77 + NSIDFilter string 78 + RelayHost string 76 79 77 80 syncLimiter *rate.Limiter 78 81 ··· 80 83 magicHeaderVal string 81 84 82 85 stop chan chan struct{} 86 + 87 + Directory identity.Directory 83 88 } 84 89 85 90 var ( ··· 110 115 ParallelRecordCreates int 111 116 NSIDFilter string 112 117 SyncRequestsPerSecond int 113 - CheckoutPath string 118 + RelayHost string 114 119 } 115 120 116 121 func DefaultBackfillOptions() *BackfillOptions { ··· 119 124 ParallelRecordCreates: 100, 120 125 NSIDFilter: "", 121 126 SyncRequestsPerSecond: 2, 122 - CheckoutPath: "https://bsky.network/xrpc/com.atproto.sync.getRepo", 127 + RelayHost: "https://bsky.network", 123 128 } 124 129 } 125 130 ··· 135 140 if opts == nil { 136 141 opts = DefaultBackfillOptions() 137 142 } 143 + 144 + // Convert wss:// or ws:// to https:// or http:// 145 + if strings.HasPrefix(opts.RelayHost, "wss://") { 146 + opts.RelayHost = "https://" + opts.RelayHost[6:] 147 + } else if strings.HasPrefix(opts.RelayHost, "ws://") { 148 + opts.RelayHost = "http://" + opts.RelayHost[5:] 149 + } 150 + 138 151 return &Backfiller{ 139 152 Name: name, 140 153 Store: store, ··· 145 158 ParallelRecordCreates: opts.ParallelRecordCreates, 146 159 NSIDFilter: opts.NSIDFilter, 147 160 syncLimiter: rate.NewLimiter(rate.Limit(opts.SyncRequestsPerSecond), 1), 148 - CheckoutPath: opts.CheckoutPath, 161 + RelayHost: opts.RelayHost, 149 162 stop: make(chan chan struct{}, 1), 163 + Directory: identity.DefaultDirectory(), 150 164 } 151 165 } 152 166 ··· 292 306 err error 293 307 } 294 308 295 - // BackfillRepo backfills a repo 296 - func (b *Backfiller) BackfillRepo(ctx context.Context, job Job) (string, error) { 297 - ctx, span := tracer.Start(ctx, "BackfillRepo") 298 - defer span.End() 309 + // Fetches a repo CAR file over HTTP from the indicated host. If successful, parses the CAR and returns repo.Repo 310 + func (b *Backfiller) fetchRepo(ctx context.Context, did, since, host string) (*repo.Repo, error) { 311 + url := fmt.Sprintf("%s/xrpc/com.atproto.sync.getRepo?did=%s", host, did) 299 312 300 - start := time.Now() 301 - 302 - repoDid := job.Repo() 303 - 304 - log := slog.With("source", "backfiller_backfill_repo", "repo", repoDid) 305 - if job.RetryCount() > 0 { 306 - log = log.With("retry_count", job.RetryCount()) 307 - } 308 - log.Info(fmt.Sprintf("processing backfill for %s", repoDid)) 309 - 310 - url := fmt.Sprintf("%s?did=%s", b.CheckoutPath, repoDid) 311 - 312 - if job.Rev() != "" { 313 - url = url + fmt.Sprintf("&since=%s", job.Rev()) 313 + if since != "" { 314 + url = url + fmt.Sprintf("&since=%s", since) 314 315 } 315 316 316 317 // GET and CAR decode the body ··· 320 321 } 321 322 req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 322 323 if err != nil { 323 - state := fmt.Sprintf("failed (create request: %s)", err.Error()) 324 - return state, fmt.Errorf("failed to create request: %w", err) 324 + return nil, fmt.Errorf("failed to create request: %w", err) 325 325 } 326 326 327 327 req.Header.Set("Accept", "application/vnd.ipld.car") ··· 334 334 335 335 resp, err := client.Do(req) 336 336 if err != nil { 337 - state := fmt.Sprintf("failed (do request: %s)", err.Error()) 338 - return state, fmt.Errorf("failed to send request: %w", err) 337 + return nil, fmt.Errorf("failed to send request: %w", err) 339 338 } 340 339 341 340 if resp.StatusCode != http.StatusOK { ··· 345 344 } else { 346 345 reason = resp.Status 347 346 } 348 - state := fmt.Sprintf("failed (%s)", reason) 349 - return state, fmt.Errorf("failed to get repo: %s", reason) 347 + return nil, fmt.Errorf("failed to get repo: %s", reason) 350 348 } 351 349 352 350 instrumentedReader := instrumentedReader{ ··· 356 354 357 355 defer instrumentedReader.Close() 358 356 359 - r, err := repo.ReadRepoFromCar(ctx, instrumentedReader) 357 + repo, err := repo.ReadRepoFromCar(ctx, instrumentedReader) 360 358 if err != nil { 361 - state := "failed (couldn't read repo CAR from response body)" 362 - return state, fmt.Errorf("failed to read repo from car: %w", err) 359 + return nil, fmt.Errorf("failed to parse repo from CAR file: %w", err) 360 + } 361 + return repo, nil 362 + } 363 + 364 + // BackfillRepo backfills a repo 365 + func (b *Backfiller) BackfillRepo(ctx context.Context, job Job) (string, error) { 366 + ctx, span := tracer.Start(ctx, "BackfillRepo") 367 + defer span.End() 368 + 369 + start := time.Now() 370 + 371 + repoDID := job.Repo() 372 + 373 + log := slog.With("source", "backfiller_backfill_repo", "repo", repoDID) 374 + if job.RetryCount() > 0 { 375 + log = log.With("retry_count", job.RetryCount()) 376 + } 377 + log.Info(fmt.Sprintf("processing backfill for %s", repoDID)) 378 + 379 + // first try with Relay endpoint 380 + r, err := b.fetchRepo(ctx, repoDID, job.Rev(), b.RelayHost) 381 + if err != nil { 382 + slog.Warn("repo CAR fetch from relay failed", "did", repoDID, "since", job.Rev(), "relayHost", b.RelayHost, "err", err) 383 + // fallback to direct PDS fetch 384 + ident, err := b.Directory.LookupDID(ctx, syntax.DID(repoDID)) 385 + if err != nil { 386 + return "failed resolving DID to PDS repo", fmt.Errorf("resolving DID for PDS repo fetch: %w", err) 387 + } 388 + pdsHost := ident.PDSEndpoint() 389 + if pdsHost == "" { 390 + return "DID document missing PDS endpoint", fmt.Errorf("no PDS endpoint for DID: %s", repoDID) 391 + } 392 + r, err = b.fetchRepo(ctx, repoDID, job.Rev(), pdsHost) 393 + if err != nil { 394 + slog.Warn("repo CAR fetch from PDS failed", "did", repoDID, "since", job.Rev(), "pdsHost", pdsHost, "err", err) 395 + return "repo CAR fetch from PDS failed", err 396 + } 397 + slog.Info("repo CAR fetch from PDS successful", "did", repoDID, "since", job.Rev(), "pdsHost", pdsHost, "err", err) 363 398 } 364 399 365 400 numRecords := 0 ··· 396 431 397 432 raw := blk.RawData() 398 433 399 - err = b.HandleCreateRecord(ctx, repoDid, rev, item.recordPath, &raw, &item.nodeCid) 434 + err = b.HandleCreateRecord(ctx, repoDID, rev, item.recordPath, &raw, &item.nodeCid) 400 435 if err != nil { 401 436 recordResults <- recordResult{recordPath: item.recordPath, err: fmt.Errorf("failed to handle create record: %w", err)} 402 437 continue
+4 -1
bgs/admin.go
··· 264 264 return err 265 265 } 266 266 267 + // don't care if this errors, but we should try to disconnect something we just blocked 268 + _ = bgs.slurper.KillUpstreamConnection(host, false) 269 + 267 270 return e.JSON(200, map[string]any{ 268 271 "success": "true", 269 272 }) ··· 484 487 ctx := context.Background() 485 488 err := bgs.ResyncPDS(ctx, pds) 486 489 if err != nil { 487 - log.Errorw("failed to resync PDS", "err", err, "pds", pds.Host) 490 + log.Error("failed to resync PDS", "err", err, "pds", pds.Host) 488 491 } 489 492 }() 490 493
+172 -86
bgs/bgs.go
··· 6 6 "encoding/json" 7 7 "errors" 8 8 "fmt" 9 + "log/slog" 9 10 "net" 10 11 "net/http" 11 12 _ "net/http/pprof" 12 13 "net/url" 14 + "reflect" 13 15 "strconv" 14 16 "strings" 15 17 "sync" ··· 27 29 "github.com/bluesky-social/indigo/models" 28 30 "github.com/bluesky-social/indigo/repomgr" 29 31 "github.com/bluesky-social/indigo/xrpc" 32 + lru "github.com/hashicorp/golang-lru/v2" 30 33 "golang.org/x/sync/semaphore" 31 34 "golang.org/x/time/rate" 32 35 33 36 "github.com/gorilla/websocket" 34 37 "github.com/ipfs/go-cid" 35 38 ipld "github.com/ipfs/go-ipld-format" 36 - logging "github.com/ipfs/go-log" 37 39 "github.com/labstack/echo/v4" 38 40 "github.com/labstack/echo/v4/middleware" 39 41 promclient "github.com/prometheus/client_golang/prometheus" ··· 44 46 "gorm.io/gorm" 45 47 ) 46 48 47 - var log = logging.Logger("bgs") 48 49 var tracer = otel.Tracer("bgs") 49 50 50 51 // serverListenerBootTimeout is how long to wait for the requested server socket ··· 87 88 88 89 // Management of Compaction 89 90 compactor *Compactor 91 + 92 + // User cache 93 + userCache *lru.Cache[string, *User] 94 + 95 + // nextCrawlers gets forwarded POST /xrpc/com.atproto.sync.requestCrawl 96 + nextCrawlers []*url.URL 97 + httpClient http.Client 98 + 99 + log *slog.Logger 90 100 } 91 101 92 102 type PDSResync struct { ··· 113 123 ConcurrencyPerPDS int64 114 124 MaxQueuePerPDS int64 115 125 NumCompactionWorkers int 126 + 127 + // NextCrawlers gets forwarded POST /xrpc/com.atproto.sync.requestCrawl 128 + NextCrawlers []*url.URL 116 129 } 117 130 118 131 func DefaultBGSConfig() *BGSConfig { ··· 136 149 db.AutoMigrate(models.PDS{}) 137 150 db.AutoMigrate(models.DomainBan{}) 138 151 152 + uc, _ := lru.New[string, *User](1_000_000) 153 + 139 154 bgs := &BGS{ 140 155 Index: ix, 141 156 db: db, ··· 151 166 consumers: make(map[uint64]*SocketConsumer), 152 167 153 168 pdsResyncs: make(map[uint]*PDSResync), 169 + 170 + userCache: uc, 171 + 172 + log: slog.Default().With("system", "bgs"), 154 173 } 155 174 156 175 ix.CreateExternalUser = bgs.createExternalUser ··· 177 196 compactor.Start(bgs) 178 197 bgs.compactor = compactor 179 198 199 + bgs.nextCrawlers = config.NextCrawlers 200 + bgs.httpClient.Timeout = time.Second * 5 201 + 180 202 return bgs, nil 181 203 } 182 204 ··· 226 248 act, err := bgs.Index.GetUserOrMissing(ctx, did) 227 249 if err != nil { 228 250 w.WriteHeader(500) 229 - log.Errorf("failed to get user: %s", err) 251 + bgs.log.Error("failed to get user", "err", err) 230 252 return 231 253 } 232 254 233 255 if err := bgs.Index.Crawler.Crawl(ctx, act); err != nil { 234 256 w.WriteHeader(500) 235 - log.Errorf("failed to add user to crawler: %s", err) 257 + bgs.log.Error("failed to add user to crawler", "err", err) 236 258 return 237 259 } 238 260 }) ··· 317 339 if err2 := ctx.JSON(err.Code, map[string]any{ 318 340 "error": err.Message, 319 341 }); err2 != nil { 320 - log.Errorf("Failed to write http error: %s", err2) 342 + bgs.log.Error("Failed to write http error", "err", err2) 321 343 } 322 344 default: 323 345 sendHeader := true ··· 325 347 sendHeader = false 326 348 } 327 349 328 - log.Warnf("HANDLER ERROR: (%s) %s", ctx.Path(), err) 350 + bgs.log.Warn("HANDLER ERROR: (%s) %s", ctx.Path(), err) 329 351 330 352 if strings.HasPrefix(ctx.Path(), "/admin/") { 331 353 ctx.JSON(500, map[string]any{ ··· 418 440 419 441 func (bgs *BGS) HandleHealthCheck(c echo.Context) error { 420 442 if err := bgs.db.Exec("SELECT 1").Error; err != nil { 421 - log.Errorf("healthcheck can't connect to database: %v", err) 443 + bgs.log.Error("healthcheck can't connect to database", "err", err) 422 444 return c.JSON(500, HealthStatus{Status: "error", Message: "can't connect to database"}) 423 445 } else { 424 446 return c.JSON(200, HealthStatus{Status: "ok"}) ··· 521 543 522 544 // UpstreamStatus is the state of the user as reported by the upstream PDS 523 545 UpstreamStatus string `gorm:"index"` 546 + 547 + lk sync.Mutex 548 + } 549 + 550 + func (u *User) SetTakenDown(v bool) { 551 + u.lk.Lock() 552 + defer u.lk.Unlock() 553 + u.TakenDown = v 554 + } 555 + 556 + func (u *User) GetTakenDown() bool { 557 + u.lk.Lock() 558 + defer u.lk.Unlock() 559 + return u.TakenDown 560 + } 561 + 562 + func (u *User) SetTombstoned(v bool) { 563 + u.lk.Lock() 564 + defer u.lk.Unlock() 565 + u.Tombstoned = v 566 + } 567 + 568 + func (u *User) GetTombstoned() bool { 569 + u.lk.Lock() 570 + defer u.lk.Unlock() 571 + return u.Tombstoned 572 + } 573 + 574 + func (u *User) SetUpstreamStatus(v string) { 575 + u.lk.Lock() 576 + defer u.lk.Unlock() 577 + u.UpstreamStatus = v 578 + } 579 + 580 + func (u *User) GetUpstreamStatus() string { 581 + u.lk.Lock() 582 + defer u.lk.Unlock() 583 + return u.UpstreamStatus 524 584 } 525 585 526 586 type addTargetBody struct { ··· 547 607 548 608 var m = &dto.Metric{} 549 609 if err := c.EventsSent.Write(m); err != nil { 550 - log.Errorf("failed to get sent counter: %s", err) 610 + bgs.log.Error("failed to get sent counter", "err", err) 551 611 } 552 612 553 - log.Infow("consumer disconnected", 613 + bgs.log.Info("consumer disconnected", 554 614 "consumer_id", id, 555 615 "remote_addr", c.RemoteAddr, 556 616 "user_agent", c.UserAgent, ··· 602 662 } 603 663 604 664 if err := conn.WriteControl(websocket.PingMessage, []byte{}, time.Now().Add(5*time.Second)); err != nil { 605 - log.Warnf("failed to ping client: %s", err) 665 + bgs.log.Warn("failed to ping client", "err", err) 606 666 cancel() 607 667 return 608 668 } ··· 627 687 for { 628 688 _, _, err := conn.ReadMessage() 629 689 if err != nil { 630 - log.Warnf("failed to read message from client: %s", err) 690 + bgs.log.Warn("failed to read message from client", "err", err) 631 691 cancel() 632 692 return 633 693 } ··· 654 714 consumerID := bgs.registerConsumer(&consumer) 655 715 defer bgs.cleanupConsumer(consumerID) 656 716 657 - logger := log.With( 717 + logger := bgs.log.With( 658 718 "consumer_id", consumerID, 659 719 "remote_addr", consumer.RemoteAddr, 660 720 "user_agent", consumer.UserAgent, 661 721 ) 662 722 663 - logger.Infow("new consumer", "cursor", since) 723 + logger.Info("new consumer", "cursor", since) 664 724 665 725 for { 666 726 select { ··· 672 732 673 733 wc, err := conn.NextWriter(websocket.BinaryMessage) 674 734 if err != nil { 675 - logger.Errorf("failed to get next writer: %s", err) 735 + logger.Error("failed to get next writer", "err", err) 676 736 return err 677 737 } 678 738 ··· 686 746 } 687 747 688 748 if err := wc.Close(); err != nil { 689 - logger.Warnf("failed to flush-close our event write: %s", err) 749 + logger.Warn("failed to flush-close our event write", "err", err) 690 750 return nil 691 751 } 692 752 ··· 707 767 // defensive in case things change under the hood. 708 768 registry, ok := promclient.DefaultRegisterer.(*promclient.Registry) 709 769 if !ok { 710 - log.Warnf("failed to export default prometheus registry; some metrics will be unavailable; unexpected type: %T", promclient.DefaultRegisterer) 770 + slog.Warn("failed to export default prometheus registry; some metrics will be unavailable; unexpected type", "type", reflect.TypeOf(promclient.DefaultRegisterer)) 711 771 } 712 772 exporter, err := prometheus.NewExporter(prometheus.Options{ 713 773 Registry: registry, 714 774 Namespace: "bigsky", 715 775 }) 716 776 if err != nil { 717 - log.Errorf("could not create the prometheus stats exporter: %v", err) 777 + slog.Error("could not create the prometheus stats exporter", "err", err, "system", "bgs") 718 778 } 719 779 720 780 return exporter ··· 771 831 ctx, span := tracer.Start(ctx, "lookupUserByDid") 772 832 defer span.End() 773 833 834 + cu, ok := bgs.userCache.Get(did) 835 + if ok { 836 + return cu, nil 837 + } 838 + 774 839 var u User 775 840 if err := bgs.db.Find(&u, "did = ?", did).Error; err != nil { 776 841 return nil, err ··· 779 844 if u.ID == 0 { 780 845 return nil, gorm.ErrRecordNotFound 781 846 } 847 + 848 + bgs.userCache.Add(did, &u) 782 849 783 850 return &u, nil 784 851 } ··· 822 889 case env.RepoCommit != nil: 823 890 repoCommitsReceivedCounter.WithLabelValues(host.Host).Add(1) 824 891 evt := env.RepoCommit 825 - log.Debugw("bgs got repo append event", "seq", evt.Seq, "pdsHost", host.Host, "repo", evt.Repo) 892 + bgs.log.Debug("bgs got repo append event", "seq", evt.Seq, "pdsHost", host.Host, "repo", evt.Repo) 893 + 894 + s := time.Now() 826 895 u, err := bgs.lookupUserByDid(ctx, evt.Repo) 896 + userLookupDuration.Observe(time.Since(s).Seconds()) 827 897 if err != nil { 828 898 if !errors.Is(err, gorm.ErrRecordNotFound) { 899 + repoCommitsResultCounter.WithLabelValues(host.Host, "nou").Inc() 829 900 return fmt.Errorf("looking up event user: %w", err) 830 901 } 831 902 832 903 newUsersDiscovered.Inc() 904 + start := time.Now() 833 905 subj, err := bgs.createExternalUser(ctx, evt.Repo) 906 + newUserDiscoveryDuration.Observe(time.Since(start).Seconds()) 834 907 if err != nil { 908 + repoCommitsResultCounter.WithLabelValues(host.Host, "uerr").Inc() 835 909 return fmt.Errorf("fed event create external user: %w", err) 836 910 } 837 911 ··· 840 914 u.Did = evt.Repo 841 915 } 842 916 843 - span.SetAttributes(attribute.String("upstream_status", u.UpstreamStatus)) 917 + ustatus := u.GetUpstreamStatus() 918 + span.SetAttributes(attribute.String("upstream_status", ustatus)) 844 919 845 - if u.TakenDown || u.UpstreamStatus == events.AccountStatusTakendown { 846 - span.SetAttributes(attribute.Bool("taken_down_by_relay_admin", u.TakenDown)) 847 - log.Debugw("dropping commit event from taken down user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host) 920 + if u.GetTakenDown() || ustatus == events.AccountStatusTakendown { 921 + span.SetAttributes(attribute.Bool("taken_down_by_relay_admin", u.GetTakenDown())) 922 + bgs.log.Debug("dropping commit event from taken down user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host) 923 + repoCommitsResultCounter.WithLabelValues(host.Host, "tdu").Inc() 848 924 return nil 849 925 } 850 926 851 - if u.UpstreamStatus == events.AccountStatusSuspended { 852 - log.Debugw("dropping commit event from suspended user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host) 927 + if ustatus == events.AccountStatusSuspended { 928 + bgs.log.Debug("dropping commit event from suspended user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host) 929 + repoCommitsResultCounter.WithLabelValues(host.Host, "susu").Inc() 853 930 return nil 854 931 } 855 932 856 - if u.UpstreamStatus == events.AccountStatusDeactivated { 857 - log.Debugw("dropping commit event from deactivated user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host) 933 + if ustatus == events.AccountStatusDeactivated { 934 + bgs.log.Debug("dropping commit event from deactivated user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host) 935 + repoCommitsResultCounter.WithLabelValues(host.Host, "du").Inc() 858 936 return nil 859 937 } 860 938 861 939 if evt.Rebase { 940 + repoCommitsResultCounter.WithLabelValues(host.Host, "rebase").Inc() 862 941 return fmt.Errorf("rebase was true in event seq:%d,host:%s", evt.Seq, host.Host) 863 942 } 864 943 865 944 if host.ID != u.PDS && u.PDS != 0 { 866 - log.Warnw("received event for repo from different pds than expected", "repo", evt.Repo, "expPds", u.PDS, "gotPds", host.Host) 945 + bgs.log.Warn("received event for repo from different pds than expected", "repo", evt.Repo, "expPds", u.PDS, "gotPds", host.Host) 867 946 // Flush any cached DID documents for this user 868 947 bgs.didr.FlushCacheFor(env.RepoCommit.Repo) 869 948 870 949 subj, err := bgs.createExternalUser(ctx, evt.Repo) 871 950 if err != nil { 951 + repoCommitsResultCounter.WithLabelValues(host.Host, "uerr2").Inc() 872 952 return err 873 953 } 874 954 875 955 if subj.PDS != host.ID { 956 + repoCommitsResultCounter.WithLabelValues(host.Host, "noauth").Inc() 876 957 return fmt.Errorf("event from non-authoritative pds") 877 958 } 878 959 } 879 960 880 - if u.Tombstoned { 961 + if u.GetTombstoned() { 881 962 span.SetAttributes(attribute.Bool("tombstoned", true)) 882 963 // we've checked the authority of the users PDS, so reinstate the account 883 964 if err := bgs.db.Model(&User{}).Where("id = ?", u.ID).UpdateColumn("tombstoned", false).Error; err != nil { 965 + repoCommitsResultCounter.WithLabelValues(host.Host, "tomb").Inc() 884 966 return fmt.Errorf("failed to un-tombstone a user: %w", err) 885 967 } 968 + u.SetTombstoned(false) 886 969 887 970 ai, err := bgs.Index.LookupUser(ctx, u.ID) 888 971 if err != nil { 972 + repoCommitsResultCounter.WithLabelValues(host.Host, "nou2").Inc() 889 973 return fmt.Errorf("failed to look up user (tombstone recover): %w", err) 890 974 } 891 975 892 976 // Now a simple re-crawl should suffice to bring the user back online 977 + repoCommitsResultCounter.WithLabelValues(host.Host, "catchupt").Inc() 893 978 return bgs.Index.Crawler.AddToCatchupQueue(ctx, host, ai, evt) 894 979 } 895 980 ··· 898 983 rebasesCounter.WithLabelValues(host.Host).Add(1) 899 984 ai, err := bgs.Index.LookupUser(ctx, u.ID) 900 985 if err != nil { 986 + repoCommitsResultCounter.WithLabelValues(host.Host, "nou3").Inc() 901 987 return fmt.Errorf("failed to look up user (slow path): %w", err) 902 988 } 903 989 ··· 909 995 // processor coming off of the pds stream, we should investigate 910 996 // whether or not we even need this 'slow path' logic, as it makes 911 997 // accounting for which events have been processed much harder 998 + repoCommitsResultCounter.WithLabelValues(host.Host, "catchup").Inc() 912 999 return bgs.Index.Crawler.AddToCatchupQueue(ctx, host, ai, evt) 913 1000 } 914 1001 915 1002 if err := bgs.repoman.HandleExternalUserEvent(ctx, host.ID, u.ID, u.Did, evt.Since, evt.Rev, evt.Blocks, evt.Ops); err != nil { 916 - log.Warnw("failed handling event", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "prev", stringLink(evt.Prev), "commit", evt.Commit.String()) 917 1003 918 1004 if errors.Is(err, carstore.ErrRepoBaseMismatch) || ipld.IsNotFound(err) { 919 1005 ai, lerr := bgs.Index.LookupUser(ctx, u.ID) 920 1006 if lerr != nil { 1007 + log.Warn("failed handling event, no user", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "prev", stringLink(evt.Prev), "commit", evt.Commit.String()) 1008 + repoCommitsResultCounter.WithLabelValues(host.Host, "nou4").Inc() 921 1009 return fmt.Errorf("failed to look up user %s (%d) (err case: %s): %w", u.Did, u.ID, err, lerr) 922 1010 } 923 1011 924 1012 span.SetAttributes(attribute.Bool("catchup_queue", true)) 925 1013 1014 + log.Info("failed handling event, catchup", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "prev", stringLink(evt.Prev), "commit", evt.Commit.String()) 1015 + repoCommitsResultCounter.WithLabelValues(host.Host, "catchup2").Inc() 926 1016 return bgs.Index.Crawler.AddToCatchupQueue(ctx, host, ai, evt) 927 1017 } 928 1018 1019 + log.Warn("failed handling event", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "prev", stringLink(evt.Prev), "commit", evt.Commit.String()) 1020 + repoCommitsResultCounter.WithLabelValues(host.Host, "err").Inc() 929 1021 return fmt.Errorf("handle user event failed: %w", err) 930 1022 } 931 1023 1024 + repoCommitsResultCounter.WithLabelValues(host.Host, "ok").Inc() 932 1025 return nil 933 1026 case env.RepoHandle != nil: 934 - log.Infow("bgs got repo handle event", "did", env.RepoHandle.Did, "handle", env.RepoHandle.Handle) 1027 + bgs.log.Info("bgs got repo handle event", "did", env.RepoHandle.Did, "handle", env.RepoHandle.Handle) 935 1028 // Flush any cached DID documents for this user 936 1029 bgs.didr.FlushCacheFor(env.RepoHandle.Did) 937 1030 ··· 942 1035 } 943 1036 944 1037 if act.Handle.String != env.RepoHandle.Handle { 945 - log.Warnw("handle update did not update handle to asserted value", "did", env.RepoHandle.Did, "expected", env.RepoHandle.Handle, "actual", act.Handle) 1038 + bgs.log.Warn("handle update did not update handle to asserted value", "did", env.RepoHandle.Did, "expected", env.RepoHandle.Handle, "actual", act.Handle) 946 1039 } 947 1040 948 1041 // TODO: Update the ReposHandle event type to include "verified" or something ··· 956 1049 }, 957 1050 }) 958 1051 if err != nil { 959 - log.Errorw("failed to broadcast RepoHandle event", "error", err, "did", env.RepoHandle.Did, "handle", env.RepoHandle.Handle) 1052 + bgs.log.Error("failed to broadcast RepoHandle event", "error", err, "did", env.RepoHandle.Did, "handle", env.RepoHandle.Handle) 960 1053 return fmt.Errorf("failed to broadcast RepoHandle event: %w", err) 961 1054 } 962 1055 963 1056 return nil 964 1057 case env.RepoIdentity != nil: 965 - log.Infow("bgs got identity event", "did", env.RepoIdentity.Did) 1058 + bgs.log.Info("bgs got identity event", "did", env.RepoIdentity.Did) 966 1059 // Flush any cached DID documents for this user 967 1060 bgs.didr.FlushCacheFor(env.RepoIdentity.Did) 968 1061 ··· 982 1075 }, 983 1076 }) 984 1077 if err != nil { 985 - log.Errorw("failed to broadcast Identity event", "error", err, "did", env.RepoIdentity.Did) 1078 + bgs.log.Error("failed to broadcast Identity event", "error", err, "did", env.RepoIdentity.Did) 986 1079 return fmt.Errorf("failed to broadcast Identity event: %w", err) 987 1080 } 988 1081 ··· 998 1091 span.SetAttributes(attribute.String("repo_status", *env.RepoAccount.Status)) 999 1092 } 1000 1093 1001 - log.Infow("bgs got account event", "did", env.RepoAccount.Did) 1094 + bgs.log.Info("bgs got account event", "did", env.RepoAccount.Did) 1002 1095 // Flush any cached DID documents for this user 1003 1096 bgs.didr.FlushCacheFor(env.RepoAccount.Did) 1004 1097 ··· 1012 1105 // Check if the PDS is still authoritative 1013 1106 // if not we don't want to be propagating this account event 1014 1107 if ai.PDS != host.ID { 1015 - log.Errorw("account event from non-authoritative pds", 1108 + bgs.log.Error("account event from non-authoritative pds", 1016 1109 "seq", env.RepoAccount.Seq, 1017 1110 "did", env.RepoAccount.Did, 1018 1111 "event_from", host.Host, ··· 1041 1134 return fmt.Errorf("failed to look up user by did: %w", err) 1042 1135 } 1043 1136 1044 - if u.TakenDown { 1137 + if u.GetTakenDown() { 1045 1138 shouldBeActive = false 1046 1139 status = &events.AccountStatusTakendown 1047 1140 } ··· 1057 1150 }, 1058 1151 }) 1059 1152 if err != nil { 1060 - log.Errorw("failed to broadcast Account event", "error", err, "did", env.RepoAccount.Did) 1153 + bgs.log.Error("failed to broadcast Account event", "error", err, "did", env.RepoAccount.Did) 1061 1154 return fmt.Errorf("failed to broadcast Account event: %w", err) 1062 1155 } 1063 1156 ··· 1095 1188 }).Error; err != nil { 1096 1189 return err 1097 1190 } 1191 + u.SetTombstoned(true) 1098 1192 1099 1193 if err := bgs.db.Model(&models.ActorInfo{}).Where("uid = ?", u.ID).UpdateColumns(map[string]any{ 1100 1194 "handle": nil, ··· 1105 1199 // delete data from carstore 1106 1200 if err := bgs.repoman.TakeDownRepo(ctx, u.ID); err != nil { 1107 1201 // don't let a failure here prevent us from propagating this event 1108 - log.Errorf("failed to delete user data from carstore: %s", err) 1202 + bgs.log.Error("failed to delete user data from carstore", "err", err) 1109 1203 } 1110 1204 1111 1205 return bgs.events.AddEvent(ctx, &events.XRPCStreamEvent{ ··· 1120 1214 1121 1215 externalUserCreationAttempts.Inc() 1122 1216 1123 - log.Debugf("create external user: %s", did) 1217 + s.log.Debug("create external user", "did", did) 1124 1218 doc, err := s.didr.GetDocument(ctx, did) 1125 1219 if err != nil { 1126 1220 return nil, fmt.Errorf("could not locate DID document for followed user (%s): %w", did, err) ··· 1143 1237 // TODO: the PDS's DID should also be in the service, we could use that to look up? 1144 1238 var peering models.PDS 1145 1239 if err := s.db.Find(&peering, "host = ?", durl.Host).Error; err != nil { 1146 - log.Error("failed to find pds", durl.Host) 1240 + s.log.Error("failed to find pds", "host", durl.Host) 1147 1241 return nil, err 1148 1242 } 1149 1243 ··· 1216 1310 defer func() { 1217 1311 if !successfullyCreated { 1218 1312 if err := s.db.Model(&models.PDS{}).Where("id = ?", peering.ID).Update("repo_count", gorm.Expr("repo_count - 1")).Error; err != nil { 1219 - log.Errorf("failed to decrement repo count for pds: %s", err) 1313 + s.log.Error("failed to decrement repo count for pds", "err", err) 1220 1314 } 1221 1315 } 1222 1316 }() ··· 1230 1324 return nil, err 1231 1325 } 1232 1326 1233 - log.Debugw("creating external user", "did", did, "handle", hurl.Host, "pds", peering.ID) 1327 + s.log.Debug("creating external user", "did", did, "handle", hurl.Host, "pds", peering.ID) 1234 1328 1235 1329 handle := hurl.Host 1236 1330 ··· 1238 1332 1239 1333 resdid, err := s.hr.ResolveHandleToDid(ctx, handle) 1240 1334 if err != nil { 1241 - log.Errorf("failed to resolve users claimed handle (%q) on pds: %s", handle, err) 1335 + s.log.Error("failed to resolve users claimed handle on pds", "handle", handle, "err", err) 1242 1336 validHandle = false 1243 1337 } 1244 1338 1245 1339 if resdid != did { 1246 - log.Errorf("claimed handle did not match servers response (%s != %s)", resdid, did) 1340 + s.log.Error("claimed handle did not match servers response", "resdid", resdid, "did", did) 1247 1341 validHandle = false 1248 1342 } 1249 1343 ··· 1252 1346 1253 1347 exu, err := s.Index.LookupUserByDid(ctx, did) 1254 1348 if err == nil { 1255 - log.Debugw("lost the race to create a new user", "did", did, "handle", handle, "existing_hand", exu.Handle) 1349 + s.log.Debug("lost the race to create a new user", "did", did, "handle", handle, "existing_hand", exu.Handle) 1256 1350 if exu.PDS != peering.ID { 1257 1351 // User is now on a different PDS, update 1258 1352 if err := s.db.Model(User{}).Where("id = ?", exu.Uid).Update("pds", peering.ID).Error; err != nil { ··· 1323 1417 if err := s.db.Create(&u).Error; err != nil { 1324 1418 return nil, fmt.Errorf("failed to create user after handle conflict: %w", err) 1325 1419 } 1420 + 1421 + s.userCache.Remove(did) 1326 1422 } else { 1327 1423 return nil, fmt.Errorf("failed to create other pds user: %w", err) 1328 1424 } ··· 1370 1466 if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("upstream_status", events.AccountStatusActive).Error; err != nil { 1371 1467 return fmt.Errorf("failed to set user active status: %w", err) 1372 1468 } 1469 + u.SetUpstreamStatus(events.AccountStatusActive) 1373 1470 case events.AccountStatusDeactivated: 1374 1471 if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("upstream_status", events.AccountStatusDeactivated).Error; err != nil { 1375 1472 return fmt.Errorf("failed to set user deactivation status: %w", err) 1376 1473 } 1474 + u.SetUpstreamStatus(events.AccountStatusDeactivated) 1377 1475 case events.AccountStatusSuspended: 1378 1476 if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("upstream_status", events.AccountStatusSuspended).Error; err != nil { 1379 1477 return fmt.Errorf("failed to set user suspension status: %w", err) 1380 1478 } 1479 + u.SetUpstreamStatus(events.AccountStatusSuspended) 1381 1480 case events.AccountStatusTakendown: 1382 1481 if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("upstream_status", events.AccountStatusTakendown).Error; err != nil { 1383 1482 return fmt.Errorf("failed to set user taken down status: %w", err) 1384 1483 } 1484 + u.SetUpstreamStatus(events.AccountStatusTakendown) 1385 1485 1386 1486 if err := bgs.db.Model(&models.ActorInfo{}).Where("uid = ?", u.ID).UpdateColumns(map[string]any{ 1387 1487 "handle": nil, ··· 1396 1496 }).Error; err != nil { 1397 1497 return err 1398 1498 } 1499 + u.SetUpstreamStatus(events.AccountStatusDeleted) 1399 1500 1400 1501 if err := bgs.db.Model(&models.ActorInfo{}).Where("uid = ?", u.ID).UpdateColumns(map[string]any{ 1401 1502 "handle": nil, ··· 1406 1507 // delete data from carstore 1407 1508 if err := bgs.repoman.TakeDownRepo(ctx, u.ID); err != nil { 1408 1509 // don't let a failure here prevent us from propagating this event 1409 - log.Errorf("failed to delete user data from carstore: %s", err) 1510 + bgs.log.Error("failed to delete user data from carstore", "err", err) 1410 1511 } 1411 1512 } 1412 1513 ··· 1422 1523 if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("taken_down", true).Error; err != nil { 1423 1524 return err 1424 1525 } 1526 + u.SetTakenDown(true) 1425 1527 1426 1528 if err := bgs.repoman.TakeDownRepo(ctx, u.ID); err != nil { 1427 1529 return err ··· 1443 1545 if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("taken_down", false).Error; err != nil { 1444 1546 return err 1445 1547 } 1548 + u.SetTakenDown(false) 1446 1549 1447 1550 return nil 1448 1551 } ··· 1511 1614 func (bgs *BGS) ResyncPDS(ctx context.Context, pds models.PDS) error { 1512 1615 ctx, span := tracer.Start(ctx, "ResyncPDS") 1513 1616 defer span.End() 1514 - log := log.With("pds", pds.Host, "source", "resync_pds") 1617 + log := bgs.log.With("pds", pds.Host, "source", "resync_pds") 1515 1618 resync, found := bgs.LoadOrStoreResync(pds) 1516 1619 if found { 1517 1620 return fmt.Errorf("resync already in progress") ··· 1543 1646 for { 1544 1647 pages++ 1545 1648 if pages%10 == 0 { 1546 - log.Warnw("fetching PDS page during resync", "pages", pages, "total_repos", len(repos)) 1649 + log.Warn("fetching PDS page during resync", "pages", pages, "total_repos", len(repos)) 1547 1650 resync.NumRepoPages = pages 1548 1651 resync.NumRepos = len(repos) 1549 1652 bgs.UpdateResync(resync) 1550 1653 } 1551 1654 if err := limiter.Wait(ctx); err != nil { 1552 - log.Errorw("failed to wait for rate limiter", "error", err) 1655 + log.Error("failed to wait for rate limiter", "error", err) 1553 1656 return fmt.Errorf("failed to wait for rate limiter: %w", err) 1554 1657 } 1555 1658 repoList, err := comatproto.SyncListRepos(ctx, &xrpcc, cursor, limit) 1556 1659 if err != nil { 1557 - log.Errorw("failed to list repos", "error", err) 1660 + log.Error("failed to list repos", "error", err) 1558 1661 return fmt.Errorf("failed to list repos: %w", err) 1559 1662 } 1560 1663 ··· 1576 1679 1577 1680 repolistDone := time.Now() 1578 1681 1579 - log.Warnw("listed all repos, checking roots", "num_repos", len(repos), "took", repolistDone.Sub(start)) 1682 + log.Warn("listed all repos, checking roots", "num_repos", len(repos), "took", repolistDone.Sub(start)) 1580 1683 resync = bgs.SetResyncStatus(pds.ID, "checking revs") 1581 1684 1582 - // Create a buffered channel for collecting results 1583 - results := make(chan revCheckResult, len(repos)) 1685 + // run loop over repos with some concurrency 1584 1686 sem := semaphore.NewWeighted(40) 1585 1687 1586 1688 // Check repo revs against our local copy and enqueue crawls for any that are out of date 1587 - for _, r := range repos { 1689 + for i, r := range repos { 1588 1690 if err := sem.Acquire(ctx, 1); err != nil { 1589 - log.Errorw("failed to acquire semaphore", "error", err) 1590 - results <- revCheckResult{err: err} 1691 + log.Error("failed to acquire semaphore", "error", err) 1591 1692 continue 1592 1693 } 1593 1694 go func(r comatproto.SyncListRepos_Repo) { 1594 1695 defer sem.Release(1) 1595 - log := log.With("did", r.Did, "remote_rev", r.Rev) 1696 + log := bgs.log.With("did", r.Did, "remote_rev", r.Rev) 1596 1697 // Fetches the user if we have it, otherwise automatically enqueues it for crawling 1597 1698 ai, err := bgs.Index.GetUserOrMissing(ctx, r.Did) 1598 1699 if err != nil { 1599 - log.Errorw("failed to get user while resyncing PDS, we can't recrawl it", "error", err) 1600 - results <- revCheckResult{err: err} 1700 + log.Error("failed to get user while resyncing PDS, we can't recrawl it", "error", err) 1601 1701 return 1602 1702 } 1603 1703 1604 1704 rev, err := bgs.repoman.GetRepoRev(ctx, ai.Uid) 1605 1705 if err != nil { 1606 - log.Warnw("recrawling because we failed to get the local repo root", "err", err, "uid", ai.Uid) 1607 - results <- revCheckResult{ai: ai} 1706 + log.Warn("recrawling because we failed to get the local repo root", "err", err, "uid", ai.Uid) 1707 + err := bgs.Index.Crawler.Crawl(ctx, ai) 1708 + if err != nil { 1709 + log.Error("failed to enqueue crawl for repo during resync", "error", err, "uid", ai.Uid, "did", ai.Did) 1710 + } 1608 1711 return 1609 1712 } 1610 1713 1611 1714 if rev == "" || rev < r.Rev { 1612 - log.Warnw("recrawling because the repo rev from the PDS is newer than our local repo rev", "local_rev", rev) 1613 - results <- revCheckResult{ai: ai} 1715 + log.Warn("recrawling because the repo rev from the PDS is newer than our local repo rev", "local_rev", rev) 1716 + err := bgs.Index.Crawler.Crawl(ctx, ai) 1717 + if err != nil { 1718 + log.Error("failed to enqueue crawl for repo during resync", "error", err, "uid", ai.Uid, "did", ai.Did) 1719 + } 1614 1720 return 1615 1721 } 1616 - 1617 - results <- revCheckResult{} 1618 1722 }(r) 1619 - } 1620 - 1621 - var numReposToResync int 1622 - for i := 0; i < len(repos); i++ { 1623 - res := <-results 1624 - if res.err != nil { 1625 - log.Errorw("failed to process repo during resync", "error", res.err) 1626 - 1627 - } 1628 - if res.ai != nil { 1629 - numReposToResync++ 1630 - err := bgs.Index.Crawler.Crawl(ctx, res.ai) 1631 - if err != nil { 1632 - log.Errorw("failed to enqueue crawl for repo during resync", "error", err, "uid", res.ai.Uid, "did", res.ai.Did) 1633 - } 1634 - } 1635 1723 if i%100 == 0 { 1636 1724 if i%10_000 == 0 { 1637 - log.Warnw("checked revs during resync", "num_repos_checked", i, "num_repos_to_crawl", numReposToResync, "took", time.Now().Sub(resync.StatusChangedAt)) 1725 + log.Warn("checked revs during resync", "num_repos_checked", i, "num_repos_to_crawl", -1, "took", time.Now().Sub(resync.StatusChangedAt)) 1638 1726 } 1639 1727 resync.NumReposChecked = i 1640 - resync.NumReposToResync = numReposToResync 1641 1728 bgs.UpdateResync(resync) 1642 1729 } 1643 1730 } 1644 1731 1645 1732 resync.NumReposChecked = len(repos) 1646 - resync.NumReposToResync = numReposToResync 1647 1733 bgs.UpdateResync(resync) 1648 1734 1649 - log.Warnw("enqueued all crawls, exiting resync", "took", time.Now().Sub(start), "num_repos_to_crawl", numReposToResync) 1735 + bgs.log.Warn("enqueued all crawls, exiting resync", "took", time.Now().Sub(start), "num_repos_to_crawl", -1) 1650 1736 1651 1737 return nil 1652 1738 }
+7 -7
bgs/compactor.go
··· 210 210 } 211 211 if c.requeueInterval > 0 { 212 212 go func() { 213 - log.Infow("starting compactor requeue routine", 213 + log.Info("starting compactor requeue routine", 214 214 "interval", c.requeueInterval, 215 215 "limit", c.requeueLimit, 216 216 "shardCount", c.requeueShardCount, ··· 226 226 ctx := context.Background() 227 227 ctx, span := otel.Tracer("compactor").Start(ctx, "RequeueRoutine") 228 228 if err := c.EnqueueAllRepos(ctx, bgs, c.requeueLimit, c.requeueShardCount, c.requeueFast); err != nil { 229 - log.Errorw("failed to enqueue all repos", "err", err) 229 + log.Error("failed to enqueue all repos", "err", err) 230 230 } 231 231 span.End() 232 232 } ··· 262 262 time.Sleep(time.Second * 5) 263 263 continue 264 264 } 265 - log.Errorw("failed to compact repo", 265 + log.Error("failed to compact repo", 266 266 "err", err, 267 267 "uid", state.latestUID, 268 268 "repo", state.latestDID, ··· 273 273 // Pause for a bit to avoid spamming failed compactions 274 274 time.Sleep(time.Millisecond * 100) 275 275 } else { 276 - log.Infow("compacted repo", 276 + log.Info("compacted repo", 277 277 "uid", state.latestUID, 278 278 "repo", state.latestDID, 279 279 "status", state.status, ··· 349 349 return state, nil 350 350 } 351 351 352 - func (c *Compactor) EnqueueRepo(ctx context.Context, user User, fast bool) { 352 + func (c *Compactor) EnqueueRepo(ctx context.Context, user *User, fast bool) { 353 353 ctx, span := otel.Tracer("compactor").Start(ctx, "EnqueueRepo") 354 354 defer span.End() 355 - log.Infow("enqueueing compaction for repo", "repo", user.Did, "uid", user.ID, "fast", fast) 355 + log.Info("enqueueing compaction for repo", "repo", user.Did, "uid", user.ID, "fast", fast) 356 356 c.q.Append(user.ID, fast) 357 357 } 358 358 ··· 396 396 c.q.Append(r.Usr, fast) 397 397 } 398 398 399 - log.Infow("done enqueueing all repos", "repos_enqueued", len(repos)) 399 + log.Info("done enqueueing all repos", "repos_enqueued", len(repos)) 400 400 401 401 return nil 402 402 }
+36 -23
bgs/fedmgr.go
··· 4 4 "context" 5 5 "errors" 6 6 "fmt" 7 + "log/slog" 7 8 "math/rand" 8 9 "strings" 9 10 "sync" ··· 21 22 pq "github.com/lib/pq" 22 23 "gorm.io/gorm" 23 24 ) 25 + 26 + var log = slog.Default().With("system", "bgs") 24 27 25 28 type IndexCallback func(context.Context, *models.PDS, *events.XRPCStreamEvent) error 26 29 ··· 129 132 var errs []error 130 133 if errs = s.flushCursors(ctx); len(errs) > 0 { 131 134 for _, err := range errs { 132 - log.Errorf("failed to flush cursors on shutdown: %s", err) 135 + log.Error("failed to flush cursors on shutdown", "err", err) 133 136 } 134 137 } 135 138 log.Info("done flushing PDS cursors on shutdown") ··· 142 145 defer span.End() 143 146 if errs := s.flushCursors(ctx); len(errs) > 0 { 144 147 for _, err := range errs { 145 - log.Errorf("failed to flush cursors: %s", err) 148 + log.Error("failed to flush cursors", "err", err) 146 149 } 147 150 } 148 151 log.Debug("done flushing PDS cursors") ··· 210 213 errs := <-s.shutdownResult 211 214 if len(errs) > 0 { 212 215 for _, err := range errs { 213 - log.Errorf("shutdown error: %s", err) 216 + log.Error("shutdown error", "err", err) 214 217 } 215 218 } 216 219 log.Info("slurper shutdown complete") ··· 468 471 protocol = "wss" 469 472 } 470 473 474 + // Special case `.host.bsky.network` PDSs to rewind cursor by 200 events to smooth over unclean shutdowns 475 + if strings.HasSuffix(host.Host, ".host.bsky.network") && host.Cursor > 200 { 476 + host.Cursor -= 200 477 + } 478 + 471 479 cursor := host.Cursor 480 + 481 + connectedInbound.Inc() 482 + defer connectedInbound.Dec() 483 + // TODO:? maybe keep a gauge of 'in retry backoff' sources? 472 484 473 485 var backoff int 474 486 for { ··· 481 493 url := fmt.Sprintf("%s://%s/xrpc/com.atproto.sync.subscribeRepos?cursor=%d", protocol, host.Host, cursor) 482 494 con, res, err := d.DialContext(ctx, url, nil) 483 495 if err != nil { 484 - log.Warnw("dialing failed", "pdsHost", host.Host, "err", err, "backoff", backoff) 496 + log.Warn("dialing failed", "pdsHost", host.Host, "err", err, "backoff", backoff) 485 497 time.Sleep(sleepForBackoff(backoff)) 486 498 backoff++ 487 499 488 500 if backoff > 15 { 489 - log.Warnw("pds does not appear to be online, disabling for now", "pdsHost", host.Host) 501 + log.Warn("pds does not appear to be online, disabling for now", "pdsHost", host.Host) 490 502 if err := s.db.Model(&models.PDS{}).Where("id = ?", host.ID).Update("registered", false).Error; err != nil { 491 - log.Errorf("failed to unregister failing pds: %w", err) 503 + log.Error("failed to unregister failing pds", "err", err) 492 504 } 493 505 494 506 return ··· 497 509 continue 498 510 } 499 511 500 - log.Info("event subscription response code: ", res.StatusCode) 512 + log.Info("event subscription response", "code", res.StatusCode) 501 513 502 514 curCursor := cursor 503 515 if err := s.handleConnection(ctx, host, con, &cursor, sub); err != nil { 504 516 if errors.Is(err, ErrTimeoutShutdown) { 505 - log.Infof("shutting down pds subscription to %s, no activity after %s", host.Host, EventsTimeout) 517 + log.Info("shutting down pds subscription after timeout", "host", host.Host, "time", EventsTimeout) 506 518 return 507 519 } 508 - log.Warnf("connection to %q failed: %s", host.Host, err) 520 + log.Warn("connection to failed", "host", host.Host, "err", err) 509 521 } 510 522 511 523 if cursor > curCursor { ··· 536 548 537 549 rsc := &events.RepoStreamCallbacks{ 538 550 RepoCommit: func(evt *comatproto.SyncSubscribeRepos_Commit) error { 539 - log.Debugw("got remote repo event", "pdsHost", host.Host, "repo", evt.Repo, "seq", evt.Seq) 551 + log.Debug("got remote repo event", "pdsHost", host.Host, "repo", evt.Repo, "seq", evt.Seq) 540 552 if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{ 541 553 RepoCommit: evt, 542 554 }); err != nil { 543 - log.Errorf("failed handling event from %q (%d): %s", host.Host, evt.Seq, err) 555 + log.Error("failed handling event", "host", host.Host, "seq", evt.Seq, "err", err) 544 556 } 545 557 *lastCursor = evt.Seq 546 558 ··· 551 563 return nil 552 564 }, 553 565 RepoHandle: func(evt *comatproto.SyncSubscribeRepos_Handle) error { 554 - log.Infow("got remote handle update event", "pdsHost", host.Host, "did", evt.Did, "handle", evt.Handle) 566 + log.Info("got remote handle update event", "pdsHost", host.Host, "did", evt.Did, "handle", evt.Handle) 555 567 if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{ 556 568 RepoHandle: evt, 557 569 }); err != nil { 558 - log.Errorf("failed handling event from %q (%d): %s", host.Host, evt.Seq, err) 570 + log.Error("failed handling event", "host", host.Host, "seq", evt.Seq, "err", err) 559 571 } 560 572 *lastCursor = evt.Seq 561 573 ··· 566 578 return nil 567 579 }, 568 580 RepoMigrate: func(evt *comatproto.SyncSubscribeRepos_Migrate) error { 569 - log.Infow("got remote repo migrate event", "pdsHost", host.Host, "did", evt.Did, "migrateTo", evt.MigrateTo) 581 + log.Info("got remote repo migrate event", "pdsHost", host.Host, "did", evt.Did, "migrateTo", evt.MigrateTo) 570 582 if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{ 571 583 RepoMigrate: evt, 572 584 }); err != nil { 573 - log.Errorf("failed handling event from %q (%d): %s", host.Host, evt.Seq, err) 585 + log.Error("failed handling event", "host", host.Host, "seq", evt.Seq, "err", err) 574 586 } 575 587 *lastCursor = evt.Seq 576 588 ··· 581 593 return nil 582 594 }, 583 595 RepoTombstone: func(evt *comatproto.SyncSubscribeRepos_Tombstone) error { 584 - log.Infow("got remote repo tombstone event", "pdsHost", host.Host, "did", evt.Did) 596 + log.Info("got remote repo tombstone event", "pdsHost", host.Host, "did", evt.Did) 585 597 if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{ 586 598 RepoTombstone: evt, 587 599 }); err != nil { 588 - log.Errorf("failed handling event from %q (%d): %s", host.Host, evt.Seq, err) 600 + log.Error("failed handling event", "host", host.Host, "seq", evt.Seq, "err", err) 589 601 } 590 602 *lastCursor = evt.Seq 591 603 ··· 596 608 return nil 597 609 }, 598 610 RepoInfo: func(info *comatproto.SyncSubscribeRepos_Info) error { 599 - log.Infow("info event", "name", info.Name, "message", info.Message, "pdsHost", host.Host) 611 + log.Info("info event", "name", info.Name, "message", info.Message, "pdsHost", host.Host) 600 612 return nil 601 613 }, 602 614 RepoIdentity: func(ident *comatproto.SyncSubscribeRepos_Identity) error { 603 - log.Infow("identity event", "did", ident.Did) 615 + log.Info("identity event", "did", ident.Did) 604 616 if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{ 605 617 RepoIdentity: ident, 606 618 }); err != nil { 607 - log.Errorf("failed handling event from %q (%d): %s", host.Host, ident.Seq, err) 619 + log.Error("failed handling event", "host", host.Host, "seq", ident.Seq, "err", err) 608 620 } 609 621 *lastCursor = ident.Seq 610 622 ··· 615 627 return nil 616 628 }, 617 629 RepoAccount: func(acct *comatproto.SyncSubscribeRepos_Account) error { 618 - log.Infow("account event", "did", acct.Did, "status", acct.Status) 630 + log.Info("account event", "did", acct.Did, "status", acct.Status) 619 631 if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{ 620 632 RepoAccount: acct, 621 633 }); err != nil { 622 - log.Errorf("failed handling event from %q (%d): %s", host.Host, acct.Seq, err) 634 + log.Error("failed handling event", "host", host.Host, "seq", acct.Seq, "err", err) 623 635 } 624 636 *lastCursor = acct.Seq 625 637 ··· 662 674 con.RemoteAddr().String(), 663 675 instrumentedRSC.EventHandler, 664 676 ) 665 - return events.HandleRepoStream(ctx, con, pool) 677 + return events.HandleRepoStream(ctx, con, pool, nil) 666 678 } 667 679 668 680 func (s *Slurper) updateCursor(sub *activeSub, curs int64) error { ··· 733 745 return fmt.Errorf("killing connection %q: %w", host, ErrNoActiveConnection) 734 746 } 735 747 ac.cancel() 748 + // cleanup in the run thread subscribeWithRedialer() will delete(s.active, host) 736 749 737 750 if block { 738 751 if err := s.db.Model(models.PDS{}).Where("id = ?", ac.pds.ID).UpdateColumn("blocked", true).Error; err != nil {
+52 -24
bgs/handlers.go
··· 3 3 import ( 4 4 "bytes" 5 5 "context" 6 + "encoding/json" 6 7 "errors" 7 8 "fmt" 8 9 "io" ··· 30 31 if errors.Is(err, gorm.ErrRecordNotFound) { 31 32 return nil, echo.NewHTTPError(http.StatusNotFound, "user not found") 32 33 } 33 - log.Errorw("failed to lookup user", "err", err, "did", did) 34 + log.Error("failed to lookup user", "err", err, "did", did) 34 35 return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to lookup user") 35 36 } 36 37 37 - if u.Tombstoned { 38 + if u.GetTombstoned() { 38 39 return nil, fmt.Errorf("account was deleted") 39 40 } 40 41 41 - if u.TakenDown { 42 + if u.GetTakenDown() { 42 43 return nil, fmt.Errorf("account was taken down by the Relay") 43 44 } 44 45 45 - if u.UpstreamStatus == events.AccountStatusTakendown { 46 + ustatus := u.GetUpstreamStatus() 47 + if ustatus == events.AccountStatusTakendown { 46 48 return nil, fmt.Errorf("account was taken down by its PDS") 47 49 } 48 50 49 - if u.UpstreamStatus == events.AccountStatusDeactivated { 51 + if ustatus == events.AccountStatusDeactivated { 50 52 return nil, fmt.Errorf("account is temporarily deactivated") 51 53 } 52 54 53 - if u.UpstreamStatus == events.AccountStatusSuspended { 55 + if ustatus == events.AccountStatusSuspended { 54 56 return nil, fmt.Errorf("account is suspended by its PDS") 55 57 } 56 58 ··· 59 61 if errors.Is(err, mst.ErrNotFound) { 60 62 return nil, echo.NewHTTPError(http.StatusNotFound, "record not found in repo") 61 63 } 62 - log.Errorw("failed to get record from repo", "err", err, "did", did, "collection", collection, "rkey", rkey) 64 + log.Error("failed to get record from repo", "err", err, "did", did, "collection", collection, "rkey", rkey) 63 65 return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to get record from repo") 64 66 } 65 67 ··· 87 89 if errors.Is(err, gorm.ErrRecordNotFound) { 88 90 return nil, echo.NewHTTPError(http.StatusNotFound, "user not found") 89 91 } 90 - log.Errorw("failed to lookup user", "err", err, "did", did) 92 + log.Error("failed to lookup user", "err", err, "did", did) 91 93 return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to lookup user") 92 94 } 93 95 94 - if u.Tombstoned { 96 + if u.GetTombstoned() { 95 97 return nil, fmt.Errorf("account was deleted") 96 98 } 97 99 98 - if u.TakenDown { 100 + if u.GetTakenDown() { 99 101 return nil, fmt.Errorf("account was taken down by the Relay") 100 102 } 101 103 102 - if u.UpstreamStatus == events.AccountStatusTakendown { 104 + ustatus := u.GetUpstreamStatus() 105 + if ustatus == events.AccountStatusTakendown { 103 106 return nil, fmt.Errorf("account was taken down by its PDS") 104 107 } 105 108 106 - if u.UpstreamStatus == events.AccountStatusDeactivated { 109 + if ustatus == events.AccountStatusDeactivated { 107 110 return nil, fmt.Errorf("account is temporarily deactivated") 108 111 } 109 112 110 - if u.UpstreamStatus == events.AccountStatusSuspended { 113 + if ustatus == events.AccountStatusSuspended { 111 114 return nil, fmt.Errorf("account is suspended by its PDS") 112 115 } 113 116 114 117 // TODO: stream the response 115 118 buf := new(bytes.Buffer) 116 119 if err := s.repoman.ReadRepo(ctx, u.ID, since, buf); err != nil { 117 - log.Errorw("failed to read repo into buffer", "err", err, "did", did) 120 + log.Error("failed to read repo into buffer", "err", err, "did", did) 118 121 return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to read repo into buffer") 119 122 } 120 123 ··· 167 170 return echo.NewHTTPError(http.StatusUnauthorized, "domain is banned") 168 171 } 169 172 170 - log.Warnf("TODO: better host validation for crawl requests") 173 + log.Warn("TODO: better host validation for crawl requests") 171 174 172 175 clientHost := fmt.Sprintf("%s://%s", u.Scheme, host) 173 176 ··· 185 188 // Maybe we could do something with this response later 186 189 _ = desc 187 190 191 + if len(s.nextCrawlers) != 0 { 192 + blob, err := json.Marshal(body) 193 + if err != nil { 194 + log.Warn("could not forward requestCrawl, json err", "err", err) 195 + } else { 196 + go func(bodyBlob []byte) { 197 + for _, rpu := range s.nextCrawlers { 198 + pu := rpu.JoinPath("/xrpc/com.atproto.sync.requestCrawl") 199 + response, err := s.httpClient.Post(pu.String(), "application/json", bytes.NewReader(bodyBlob)) 200 + if response != nil && response.Body != nil { 201 + response.Body.Close() 202 + } 203 + if err != nil || response == nil { 204 + log.Warn("requestCrawl forward failed", "host", rpu, "err", err) 205 + } else if response.StatusCode != http.StatusOK { 206 + log.Warn("requestCrawl forward failed", "host", rpu, "status", response.Status) 207 + } else { 208 + log.Info("requestCrawl forward successful", "host", rpu) 209 + } 210 + } 211 + }(blob) 212 + } 213 + } 214 + 188 215 return s.slurper.SubscribeToPds(ctx, host, true, false) 189 216 } 190 217 ··· 204 231 if err == gorm.ErrRecordNotFound { 205 232 return &comatprototypes.SyncListRepos_Output{}, nil 206 233 } 207 - log.Errorw("failed to query users", "err", err) 234 + log.Error("failed to query users", "err", err) 208 235 return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to query users") 209 236 } 210 237 ··· 225 252 226 253 root, err := s.repoman.GetRepoRoot(ctx, user.ID) 227 254 if err != nil { 228 - log.Errorw("failed to get repo root", "err", err, "did", user.Did) 255 + log.Error("failed to get repo root", "err", err, "did", user.Did) 229 256 return nil, echo.NewHTTPError(http.StatusInternalServerError, fmt.Sprintf("failed to get repo root for (%s): %v", user.Did, err.Error())) 230 257 } 231 258 ··· 253 280 return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to lookup user") 254 281 } 255 282 256 - if u.Tombstoned { 283 + if u.GetTombstoned() { 257 284 return nil, fmt.Errorf("account was deleted") 258 285 } 259 286 260 - if u.TakenDown { 287 + if u.GetTakenDown() { 261 288 return nil, fmt.Errorf("account was taken down by the Relay") 262 289 } 263 290 264 - if u.UpstreamStatus == events.AccountStatusTakendown { 291 + ustatus := u.GetUpstreamStatus() 292 + if ustatus == events.AccountStatusTakendown { 265 293 return nil, fmt.Errorf("account was taken down by its PDS") 266 294 } 267 295 268 - if u.UpstreamStatus == events.AccountStatusDeactivated { 296 + if ustatus == events.AccountStatusDeactivated { 269 297 return nil, fmt.Errorf("account is temporarily deactivated") 270 298 } 271 299 272 - if u.UpstreamStatus == events.AccountStatusSuspended { 300 + if ustatus == events.AccountStatusSuspended { 273 301 return nil, fmt.Errorf("account is suspended by its PDS") 274 302 } 275 303 276 304 root, err := s.repoman.GetRepoRoot(ctx, u.ID) 277 305 if err != nil { 278 - log.Errorw("failed to get repo root", "err", err, "did", u.Did) 306 + log.Error("failed to get repo root", "err", err, "did", u.Did) 279 307 return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to get repo root") 280 308 } 281 309 282 310 rev, err := s.repoman.GetRepoRev(ctx, u.ID) 283 311 if err != nil { 284 - log.Errorw("failed to get repo rev", "err", err, "did", u.Did) 312 + log.Error("failed to get repo rev", "err", err, "did", u.Did) 285 313 return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to get repo rev") 286 314 } 287 315
+22
bgs/metrics.go
··· 27 27 Help: "The total number of events received", 28 28 }, []string{"pds"}) 29 29 30 + var repoCommitsResultCounter = promauto.NewCounterVec(prometheus.CounterOpts{ 31 + Name: "repo_commits_result_counter", 32 + Help: "The results of commit events received", 33 + }, []string{"pds", "status"}) 34 + 30 35 var rebasesCounter = promauto.NewCounterVec(prometheus.CounterOpts{ 31 36 Name: "event_rebases", 32 37 Help: "The total number of rebase events received", ··· 40 45 var externalUserCreationAttempts = promauto.NewCounter(prometheus.CounterOpts{ 41 46 Name: "bgs_external_user_creation_attempts", 42 47 Help: "The total number of external users created", 48 + }) 49 + 50 + var connectedInbound = promauto.NewGauge(prometheus.GaugeOpts{ 51 + Name: "bgs_connected_inbound", 52 + Help: "Number of inbound firehoses we are consuming", 43 53 }) 44 54 45 55 var compactionDuration = promauto.NewHistogram(prometheus.HistogramOpts{ ··· 80 90 Help: "A histogram of response sizes for requests.", 81 91 Buckets: prometheus.ExponentialBuckets(100, 10, 8), 82 92 }, []string{"code", "method", "path"}) 93 + 94 + var userLookupDuration = promauto.NewHistogram(prometheus.HistogramOpts{ 95 + Name: "relay_user_lookup_duration", 96 + Help: "A histogram of user lookup latencies", 97 + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), 98 + }) 99 + 100 + var newUserDiscoveryDuration = promauto.NewHistogram(prometheus.HistogramOpts{ 101 + Name: "relay_new_user_discovery_duration", 102 + Help: "A histogram of new user discovery latencies", 103 + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), 104 + }) 83 105 84 106 // MetricsMiddleware defines handler function for metrics middleware 85 107 func MetricsMiddleware(next echo.HandlerFunc) echo.HandlerFunc {
+82 -26
carstore/bs.go
··· 6 6 "context" 7 7 "fmt" 8 8 "io" 9 + "log/slog" 9 10 "os" 10 11 "path/filepath" 11 12 "sort" ··· 24 25 cbor "github.com/ipfs/go-ipld-cbor" 25 26 ipld "github.com/ipfs/go-ipld-format" 26 27 "github.com/ipfs/go-libipfs/blocks" 27 - logging "github.com/ipfs/go-log" 28 28 car "github.com/ipld/go-car" 29 29 carutil "github.com/ipld/go-car/util" 30 30 cbg "github.com/whyrusleeping/cbor-gen" ··· 41 41 var blockGetTotalCounterUsrskip = blockGetTotalCounter.WithLabelValues("true", "miss") 42 42 var blockGetTotalCounterCached = blockGetTotalCounter.WithLabelValues("false", "hit") 43 43 var blockGetTotalCounterNormal = blockGetTotalCounter.WithLabelValues("false", "miss") 44 - 45 - var log = logging.Logger("carstore") 46 44 47 45 const MaxSliceLength = 2 << 20 48 46 ··· 62 60 } 63 61 64 62 type FileCarStore struct { 65 - meta *CarStoreGormMeta 66 - rootDir string 63 + meta *CarStoreGormMeta 64 + rootDirs []string 67 65 68 66 lscLk sync.Mutex 69 67 lastShardCache map[models.Uid]*CarShard 68 + 69 + log *slog.Logger 70 70 } 71 71 72 - func NewCarStore(meta *gorm.DB, root string) (CarStore, error) { 73 - if _, err := os.Stat(root); err != nil { 74 - if !os.IsNotExist(err) { 75 - return nil, err 76 - } 72 + func NewCarStore(meta *gorm.DB, roots []string) (CarStore, error) { 73 + for _, root := range roots { 74 + if _, err := os.Stat(root); err != nil { 75 + if !os.IsNotExist(err) { 76 + return nil, err 77 + } 77 78 78 - if err := os.Mkdir(root, 0775); err != nil { 79 - return nil, err 79 + if err := os.Mkdir(root, 0775); err != nil { 80 + return nil, err 81 + } 80 82 } 81 83 } 82 84 if err := meta.AutoMigrate(&CarShard{}, &blockRef{}); err != nil { ··· 88 90 89 91 return &FileCarStore{ 90 92 meta: &CarStoreGormMeta{meta: meta}, 91 - rootDir: root, 93 + rootDirs: roots, 92 94 lastShardCache: make(map[models.Uid]*CarShard), 95 + log: slog.Default().With("system", "carstore"), 93 96 }, nil 94 97 } 95 98 96 99 type userView struct { 97 - cs *FileCarStore 100 + cs CarStore 98 101 user models.Uid 99 102 100 103 cache map[cid.Cid]blockformat.Block ··· 108 111 } 109 112 110 113 func (uv *userView) Has(ctx context.Context, k cid.Cid) (bool, error) { 111 - return uv.cs.meta.HasUidCid(ctx, uv.user, k) 114 + _, have := uv.cache[k] 115 + if have { 116 + return have, nil 117 + } 118 + 119 + fcd, ok := uv.cs.(*FileCarStore) 120 + if !ok { 121 + return false, nil 122 + } 123 + 124 + return fcd.meta.HasUidCid(ctx, uv.user, k) 112 125 } 113 126 114 127 var CacheHits int64 115 128 var CacheMiss int64 116 129 117 130 func (uv *userView) Get(ctx context.Context, k cid.Cid) (blockformat.Block, error) { 131 + 118 132 if !k.Defined() { 119 133 return nil, fmt.Errorf("attempted to 'get' undefined cid") 120 134 } ··· 129 143 } 130 144 atomic.AddInt64(&CacheMiss, 1) 131 145 132 - path, offset, user, err := uv.cs.meta.LookupBlockRef(ctx, k) 146 + fcd, ok := uv.cs.(*FileCarStore) 147 + if !ok { 148 + return nil, ipld.ErrNotFound{Cid: k} 149 + } 150 + 151 + path, offset, user, err := fcd.meta.LookupBlockRef(ctx, k) 133 152 if err != nil { 134 153 return nil, err 135 154 } ··· 269 288 baseCid cid.Cid 270 289 seq int 271 290 readonly bool 272 - cs *FileCarStore 291 + cs CarStore 273 292 lastRev string 274 293 } 275 294 ··· 541 560 func fnameForShard(user models.Uid, seq int) string { 542 561 return fmt.Sprintf("sh-%d-%d", user, seq) 543 562 } 563 + 564 + func (cs *FileCarStore) dirForUser(user models.Uid) string { 565 + return cs.rootDirs[int(user)%len(cs.rootDirs)] 566 + } 567 + 544 568 func (cs *FileCarStore) openNewShardFile(ctx context.Context, user models.Uid, seq int) (*os.File, string, error) { 545 569 // TODO: some overwrite protections 546 - fname := filepath.Join(cs.rootDir, fnameForShard(user, seq)) 570 + fname := filepath.Join(cs.dirForUser(user), fnameForShard(user, seq)) 547 571 fi, err := os.Create(fname) 548 572 if err != nil { 549 573 return nil, "", err ··· 557 581 defer span.End() 558 582 559 583 // TODO: some overwrite protections 560 - fname := filepath.Join(cs.rootDir, fnameForShard(user, seq)) 584 + fname := filepath.Join(cs.dirForUser(user), fnameForShard(user, seq)) 561 585 if err := os.WriteFile(fname, data, 0664); err != nil { 562 586 return "", err 563 587 } ··· 579 603 return nil, fmt.Errorf("cannot write to readonly deltaSession") 580 604 } 581 605 582 - return ds.cs.writeNewShard(ctx, root, rev, ds.user, ds.seq, ds.blks, ds.rmcids) 606 + switch ocs := ds.cs.(type) { 607 + case *FileCarStore: 608 + return ocs.writeNewShard(ctx, root, rev, ds.user, ds.seq, ds.blks, ds.rmcids) 609 + case *NonArchivalCarstore: 610 + slice, err := blocksToCar(ctx, root, rev, ds.blks) 611 + if err != nil { 612 + return nil, err 613 + } 614 + return slice, ocs.updateLastCommit(ctx, ds.user, rev, root) 615 + default: 616 + return nil, fmt.Errorf("unsupported carstore type") 617 + } 583 618 } 584 619 585 620 func WriteCarHeader(w io.Writer, root cid.Cid) (int64, error) { ··· 600 635 return hnw, nil 601 636 } 602 637 638 + func blocksToCar(ctx context.Context, root cid.Cid, rev string, blks map[cid.Cid]blockformat.Block) ([]byte, error) { 639 + buf := new(bytes.Buffer) 640 + _, err := WriteCarHeader(buf, root) 641 + if err != nil { 642 + return nil, fmt.Errorf("failed to write car header: %w", err) 643 + } 644 + 645 + for k, blk := range blks { 646 + _, err := LdWrite(buf, k.Bytes(), blk.RawData()) 647 + if err != nil { 648 + return nil, fmt.Errorf("failed to write block: %w", err) 649 + } 650 + } 651 + 652 + return buf.Bytes(), nil 653 + } 654 + 603 655 func (cs *FileCarStore) writeNewShard(ctx context.Context, root cid.Cid, rev string, user models.Uid, seq int, blks map[cid.Cid]blockformat.Block, rmcids map[cid.Cid]bool) ([]byte, error) { 604 656 605 657 buf := new(bytes.Buffer) ··· 638 690 offset += nw 639 691 } 640 692 693 + start := time.Now() 641 694 path, err := cs.writeNewShardFile(ctx, user, seq, buf.Bytes()) 642 695 if err != nil { 643 696 return nil, fmt.Errorf("failed to write shard file: %w", err) 644 697 } 698 + writeShardFileDuration.Observe(time.Since(start).Seconds()) 645 699 646 700 shard := CarShard{ 647 701 Root: models.DbCID{CID: root}, ··· 652 706 Rev: rev, 653 707 } 654 708 709 + start = time.Now() 655 710 if err := cs.putShard(ctx, &shard, brefs, rmcids, false); err != nil { 656 711 return nil, err 657 712 } 713 + writeShardMetadataDuration.Observe(time.Since(start).Seconds()) 658 714 659 715 return buf.Bytes(), nil 660 716 } ··· 872 928 if !os.IsNotExist(err) { 873 929 return err 874 930 } 875 - log.Warnw("shard file we tried to delete did not exist", "shard", sh.ID, "path", sh.Path) 931 + cs.log.Warn("shard file we tried to delete did not exist", "shard", sh.ID, "path", sh.Path) 876 932 } 877 933 } 878 934 ··· 982 1038 // TODO: some overwrite protections 983 1039 // NOTE CreateTemp is used for creating a non-colliding file, but we keep it and don't delete it so don't think of it as "temporary". 984 1040 // This creates "sh-%d-%d%s" with some random stuff in the last position 985 - fi, err := os.CreateTemp(cs.rootDir, fnameForShard(user, seq)) 1041 + fi, err := os.CreateTemp(cs.dirForUser(user), fnameForShard(user, seq)) 986 1042 if err != nil { 987 1043 return nil, "", err 988 1044 } ··· 1023 1079 st, err := os.Stat(sh.Path) 1024 1080 if err != nil { 1025 1081 if os.IsNotExist(err) { 1026 - log.Warnw("missing shard, return size of zero", "path", sh.Path, "shard", sh.ID) 1082 + slog.Warn("missing shard, return size of zero", "path", sh.Path, "shard", sh.ID, "system", "carstore") 1027 1083 return 0, nil 1028 1084 } 1029 1085 return 0, fmt.Errorf("stat %q: %w", sh.Path, err) ··· 1144 1200 // still around but we're doing that anyways since compaction isn't a 1145 1201 // perfect process 1146 1202 1147 - log.Debugw("repo has dirty dupes", "count", len(dupes), "uid", user, "staleRefs", len(staleRefs), "blockRefs", len(brefs)) 1203 + cs.log.Debug("repo has dirty dupes", "count", len(dupes), "uid", user, "staleRefs", len(staleRefs), "blockRefs", len(brefs)) 1148 1204 1149 1205 //return nil, fmt.Errorf("WIP: not currently handling this case") 1150 1206 } ··· 1339 1395 }); err != nil { 1340 1396 // If we ever fail to iterate a shard file because its 1341 1397 // corrupted, just log an error and skip the shard 1342 - log.Errorw("iterating blocks in shard", "shard", s.ID, "err", err, "uid", user) 1398 + cs.log.Error("iterating blocks in shard", "shard", s.ID, "err", err, "uid", user) 1343 1399 } 1344 1400 } 1345 1401 ··· 1357 1413 _ = fi.Close() 1358 1414 1359 1415 if err2 := os.Remove(fi.Name()); err2 != nil { 1360 - log.Errorf("failed to remove shard file (%s) after failed db transaction: %w", fi.Name(), err2) 1416 + cs.log.Error("failed to remove shard file after failed db transaction", "path", fi.Name(), "err", err2) 1361 1417 } 1362 1418 1363 1419 return err
+18
carstore/metrics.go
··· 1 + package carstore 2 + 3 + import ( 4 + "github.com/prometheus/client_golang/prometheus" 5 + "github.com/prometheus/client_golang/prometheus/promauto" 6 + ) 7 + 8 + var writeShardFileDuration = promauto.NewHistogram(prometheus.HistogramOpts{ 9 + Name: "carstore_write_shard_file_duration", 10 + Help: "Duration of writing shard file to disk", 11 + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), 12 + }) 13 + 14 + var writeShardMetadataDuration = promauto.NewHistogram(prometheus.HistogramOpts{ 15 + Name: "carstore_write_shard_metadata_duration", 16 + Help: "Duration of writing shard metadata to DB", 17 + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), 18 + })
+254
carstore/nonarchive.go
··· 1 + package carstore 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + "fmt" 7 + "io" 8 + "log/slog" 9 + "sync" 10 + 11 + "github.com/bluesky-social/indigo/models" 12 + blockformat "github.com/ipfs/go-block-format" 13 + "github.com/ipfs/go-cid" 14 + "github.com/ipfs/go-datastore" 15 + blockstore "github.com/ipfs/go-ipfs-blockstore" 16 + car "github.com/ipld/go-car" 17 + "go.opentelemetry.io/otel" 18 + "gorm.io/gorm" 19 + "gorm.io/gorm/clause" 20 + ) 21 + 22 + type NonArchivalCarstore struct { 23 + db *gorm.DB 24 + 25 + lk sync.Mutex 26 + lastCommitCache map[models.Uid]*commitRefInfo 27 + 28 + log *slog.Logger 29 + } 30 + 31 + func NewNonArchivalCarstore(db *gorm.DB) (*NonArchivalCarstore, error) { 32 + if err := db.AutoMigrate(&commitRefInfo{}); err != nil { 33 + return nil, err 34 + } 35 + 36 + return &NonArchivalCarstore{ 37 + db: db, 38 + lastCommitCache: make(map[models.Uid]*commitRefInfo), 39 + log: slog.Default().With("system", "carstorena"), 40 + }, nil 41 + } 42 + 43 + type commitRefInfo struct { 44 + ID uint `gorm:"primarykey"` 45 + Uid models.Uid `gorm:"uniqueIndex"` 46 + Rev string 47 + Root models.DbCID 48 + } 49 + 50 + func (cs *NonArchivalCarstore) checkLastShardCache(user models.Uid) *commitRefInfo { 51 + cs.lk.Lock() 52 + defer cs.lk.Unlock() 53 + 54 + ls, ok := cs.lastCommitCache[user] 55 + if ok { 56 + return ls 57 + } 58 + 59 + return nil 60 + } 61 + 62 + func (cs *NonArchivalCarstore) removeLastShardCache(user models.Uid) { 63 + cs.lk.Lock() 64 + defer cs.lk.Unlock() 65 + 66 + delete(cs.lastCommitCache, user) 67 + } 68 + 69 + func (cs *NonArchivalCarstore) putLastShardCache(ls *commitRefInfo) { 70 + cs.lk.Lock() 71 + defer cs.lk.Unlock() 72 + 73 + cs.lastCommitCache[ls.Uid] = ls 74 + } 75 + 76 + func (cs *NonArchivalCarstore) loadCommitRefInfo(ctx context.Context, user models.Uid) (*commitRefInfo, error) { 77 + var out commitRefInfo 78 + if err := cs.db.Find(&out, "uid = ?", user).Error; err != nil { 79 + return nil, err 80 + } 81 + 82 + return &out, nil 83 + } 84 + 85 + func (cs *NonArchivalCarstore) getCommitRefInfo(ctx context.Context, user models.Uid) (*commitRefInfo, error) { 86 + ctx, span := otel.Tracer("carstore").Start(ctx, "getCommitRefInfo") 87 + defer span.End() 88 + 89 + maybeLs := cs.checkLastShardCache(user) 90 + if maybeLs != nil { 91 + return maybeLs, nil 92 + } 93 + 94 + lastShard, err := cs.loadCommitRefInfo(ctx, user) 95 + if err != nil { 96 + return nil, err 97 + } 98 + 99 + cs.putLastShardCache(lastShard) 100 + return lastShard, nil 101 + } 102 + 103 + func (cs *NonArchivalCarstore) updateLastCommit(ctx context.Context, uid models.Uid, rev string, cid cid.Cid) error { 104 + cri := &commitRefInfo{ 105 + Uid: uid, 106 + Rev: rev, 107 + Root: models.DbCID{CID: cid}, 108 + } 109 + 110 + if err := cs.db.Clauses(clause.OnConflict{ 111 + Columns: []clause.Column{{Name: "uid"}}, 112 + UpdateAll: true, 113 + }).Create(cri).Error; err != nil { 114 + return fmt.Errorf("update or set last commit info: %w", err) 115 + } 116 + 117 + cs.putLastShardCache(cri) 118 + 119 + return nil 120 + } 121 + 122 + func (cs *NonArchivalCarstore) NewDeltaSession(ctx context.Context, user models.Uid, since *string) (*DeltaSession, error) { 123 + ctx, span := otel.Tracer("carstore").Start(ctx, "NewSession") 124 + defer span.End() 125 + 126 + // TODO: ensure that we don't write updates on top of the wrong head 127 + // this needs to be a compare and swap type operation 128 + lastShard, err := cs.getCommitRefInfo(ctx, user) 129 + if err != nil { 130 + return nil, err 131 + } 132 + 133 + if since != nil && *since != lastShard.Rev { 134 + cs.log.Warn("revision mismatch", "commitSince", since, "lastRev", lastShard.Rev, "err", ErrRepoBaseMismatch) 135 + } 136 + 137 + return &DeltaSession{ 138 + fresh: blockstore.NewBlockstore(datastore.NewMapDatastore()), 139 + blks: make(map[cid.Cid]blockformat.Block), 140 + base: &userView{ 141 + user: user, 142 + cs: cs, 143 + prefetch: true, 144 + cache: make(map[cid.Cid]blockformat.Block), 145 + }, 146 + user: user, 147 + baseCid: lastShard.Root.CID, 148 + cs: cs, 149 + seq: 0, 150 + lastRev: lastShard.Rev, 151 + }, nil 152 + } 153 + 154 + func (cs *NonArchivalCarstore) ReadOnlySession(user models.Uid) (*DeltaSession, error) { 155 + return &DeltaSession{ 156 + base: &userView{ 157 + user: user, 158 + cs: cs, 159 + prefetch: false, 160 + cache: make(map[cid.Cid]blockformat.Block), 161 + }, 162 + readonly: true, 163 + user: user, 164 + cs: cs, 165 + }, nil 166 + } 167 + 168 + // TODO: incremental is only ever called true, remove the param 169 + func (cs *NonArchivalCarstore) ReadUserCar(ctx context.Context, user models.Uid, sinceRev string, incremental bool, w io.Writer) error { 170 + return fmt.Errorf("not supported in non-archival mode") 171 + } 172 + 173 + func (cs *NonArchivalCarstore) ImportSlice(ctx context.Context, uid models.Uid, since *string, carslice []byte) (cid.Cid, *DeltaSession, error) { 174 + ctx, span := otel.Tracer("carstore").Start(ctx, "ImportSlice") 175 + defer span.End() 176 + 177 + carr, err := car.NewCarReader(bytes.NewReader(carslice)) 178 + if err != nil { 179 + return cid.Undef, nil, err 180 + } 181 + 182 + if len(carr.Header.Roots) != 1 { 183 + return cid.Undef, nil, fmt.Errorf("invalid car file, header must have a single root (has %d)", len(carr.Header.Roots)) 184 + } 185 + 186 + ds, err := cs.NewDeltaSession(ctx, uid, since) 187 + if err != nil { 188 + return cid.Undef, nil, fmt.Errorf("new delta session failed: %w", err) 189 + } 190 + 191 + var cids []cid.Cid 192 + for { 193 + blk, err := carr.Next() 194 + if err != nil { 195 + if err == io.EOF { 196 + break 197 + } 198 + return cid.Undef, nil, err 199 + } 200 + 201 + cids = append(cids, blk.Cid()) 202 + 203 + if err := ds.Put(ctx, blk); err != nil { 204 + return cid.Undef, nil, err 205 + } 206 + } 207 + 208 + return carr.Header.Roots[0], ds, nil 209 + } 210 + 211 + func (cs *NonArchivalCarstore) GetUserRepoHead(ctx context.Context, user models.Uid) (cid.Cid, error) { 212 + lastShard, err := cs.getCommitRefInfo(ctx, user) 213 + if err != nil { 214 + return cid.Undef, err 215 + } 216 + if lastShard.ID == 0 { 217 + return cid.Undef, nil 218 + } 219 + 220 + return lastShard.Root.CID, nil 221 + } 222 + 223 + func (cs *NonArchivalCarstore) GetUserRepoRev(ctx context.Context, user models.Uid) (string, error) { 224 + lastShard, err := cs.getCommitRefInfo(ctx, user) 225 + if err != nil { 226 + return "", err 227 + } 228 + if lastShard.ID == 0 { 229 + return "", nil 230 + } 231 + 232 + return lastShard.Rev, nil 233 + } 234 + 235 + func (cs *NonArchivalCarstore) Stat(ctx context.Context, usr models.Uid) ([]UserStat, error) { 236 + return nil, nil 237 + } 238 + 239 + func (cs *NonArchivalCarstore) WipeUserData(ctx context.Context, user models.Uid) error { 240 + if err := cs.db.Raw("DELETE from commit_ref_infos WHERE uid = ?", user).Error; err != nil { 241 + return err 242 + } 243 + 244 + cs.removeLastShardCache(user) 245 + return nil 246 + } 247 + 248 + func (cs *NonArchivalCarstore) GetCompactionTargets(ctx context.Context, shardCount int) ([]CompactionTarget, error) { 249 + return nil, fmt.Errorf("compaction not supported on non-archival") 250 + } 251 + 252 + func (cs *NonArchivalCarstore) CompactUserShards(ctx context.Context, user models.Uid, skipBigShards bool) (*CompactionStats, error) { 253 + return nil, fmt.Errorf("compaction not supported in non-archival") 254 + }
+8 -3
carstore/repo_test.go
··· 30 30 return nil, nil, err 31 31 } 32 32 33 - sharddir := filepath.Join(tempdir, "shards") 34 - if err := os.MkdirAll(sharddir, 0775); err != nil { 33 + sharddir1 := filepath.Join(tempdir, "shards1") 34 + if err := os.MkdirAll(sharddir1, 0775); err != nil { 35 + return nil, nil, err 36 + } 37 + 38 + sharddir2 := filepath.Join(tempdir, "shards2") 39 + if err := os.MkdirAll(sharddir2, 0775); err != nil { 35 40 return nil, nil, err 36 41 } 37 42 ··· 45 50 return nil, nil, err 46 51 } 47 52 48 - cs, err := NewCarStore(db, sharddir) 53 + cs, err := NewCarStore(db, []string{sharddir1, sharddir2}) 49 54 if err != nil { 50 55 return nil, nil, err 51 56 }
+10 -9
cmd/astrolabe/handlers.go
··· 6 6 "net/http" 7 7 "strings" 8 8 9 + "github.com/bluesky-social/indigo/api/agnostic" 9 10 comatproto "github.com/bluesky-social/indigo/api/atproto" 10 11 _ "github.com/bluesky-social/indigo/api/bsky" 11 12 "github.com/bluesky-social/indigo/atproto/data" ··· 64 65 65 66 atid, err := syntax.ParseAtIdentifier(c.Param("atid")) 66 67 if err != nil { 67 - return echo.NewHTTPError(404, fmt.Sprintf("failed to parse handle or DID")) 68 + return echo.NewHTTPError(404, "failed to parse handle or DID") 68 69 } 69 70 70 71 ident, err := srv.dir.Lookup(ctx, *atid) ··· 96 97 97 98 atid, err := syntax.ParseAtIdentifier(c.Param("atid")) 98 99 if err != nil { 99 - return echo.NewHTTPError(400, fmt.Sprintf("failed to parse handle or DID")) 100 + return echo.NewHTTPError(400, "failed to parse handle or DID") 100 101 } 101 102 102 103 ident, err := srv.dir.Lookup(ctx, *atid) ··· 133 134 134 135 atid, err := syntax.ParseAtIdentifier(c.Param("atid")) 135 136 if err != nil { 136 - return echo.NewHTTPError(400, fmt.Sprintf("failed to parse handle or DID")) 137 + return echo.NewHTTPError(400, "failed to parse handle or DID") 137 138 } 138 139 139 140 collection, err := syntax.ParseNSID(c.Param("collection")) 140 141 if err != nil { 141 - return echo.NewHTTPError(400, fmt.Sprintf("failed to parse collection NSID")) 142 + return echo.NewHTTPError(400, "failed to parse collection NSID") 142 143 } 143 144 144 145 ident, err := srv.dir.Lookup(ctx, *atid) ··· 161 162 162 163 cursor := c.QueryParam("cursor") 163 164 // collection string, cursor string, limit int64, repo string, reverse bool, rkeyEnd string, rkeyStart string 164 - resp, err := RepoListRecords(ctx, &xrpcc, collection.String(), cursor, 100, ident.DID.String(), false, "", "") 165 + resp, err := agnostic.RepoListRecords(ctx, &xrpcc, collection.String(), cursor, 100, ident.DID.String(), false, "", "") 165 166 if err != nil { 166 167 return err 167 168 } ··· 191 192 192 193 atid, err := syntax.ParseAtIdentifier(c.Param("atid")) 193 194 if err != nil { 194 - return echo.NewHTTPError(400, fmt.Sprintf("failed to parse handle or DID")) 195 + return echo.NewHTTPError(400, "failed to parse handle or DID") 195 196 } 196 197 197 198 collection, err := syntax.ParseNSID(c.Param("collection")) 198 199 if err != nil { 199 - return echo.NewHTTPError(400, fmt.Sprintf("failed to parse collection NSID")) 200 + return echo.NewHTTPError(400, "failed to parse collection NSID") 200 201 } 201 202 202 203 rkey, err := syntax.ParseRecordKey(c.Param("rkey")) 203 204 if err != nil { 204 - return echo.NewHTTPError(400, fmt.Sprintf("failed to parse record key")) 205 + return echo.NewHTTPError(400, "failed to parse record key") 205 206 } 206 207 207 208 ident, err := srv.dir.Lookup(ctx, *atid) ··· 218 219 xrpcc := xrpc.Client{ 219 220 Host: ident.PDSEndpoint(), 220 221 } 221 - resp, err := RepoGetRecord(ctx, &xrpcc, "", collection.String(), ident.DID.String(), rkey.String()) 222 + resp, err := agnostic.RepoGetRecord(ctx, &xrpcc, "", collection.String(), ident.DID.String(), rkey.String()) 222 223 if err != nil { 223 224 return echo.NewHTTPError(400, fmt.Sprintf("failed to load record: %s", err)) 224 225 }
-42
cmd/astrolabe/repogetRecord.go
··· 1 - // Copied from indigo:api/atproto/repolistRecords.go 2 - 3 - package main 4 - 5 - // schema: com.atproto.repo.getRecord 6 - 7 - import ( 8 - "context" 9 - "encoding/json" 10 - 11 - "github.com/bluesky-social/indigo/xrpc" 12 - ) 13 - 14 - // RepoGetRecord_Output is the output of a com.atproto.repo.getRecord call. 15 - type RepoGetRecord_Output struct { 16 - Cid *string `json:"cid,omitempty" cborgen:"cid,omitempty"` 17 - Uri string `json:"uri" cborgen:"uri"` 18 - // NOTE: changed from lex decoder to json.RawMessage 19 - Value *json.RawMessage `json:"value" cborgen:"value"` 20 - } 21 - 22 - // RepoGetRecord calls the XRPC method "com.atproto.repo.getRecord". 23 - // 24 - // cid: The CID of the version of the record. If not specified, then return the most recent version. 25 - // collection: The NSID of the record collection. 26 - // repo: The handle or DID of the repo. 27 - // rkey: The Record Key. 28 - func RepoGetRecord(ctx context.Context, c *xrpc.Client, cid string, collection string, repo string, rkey string) (*RepoGetRecord_Output, error) { 29 - var out RepoGetRecord_Output 30 - 31 - params := map[string]interface{}{ 32 - "cid": cid, 33 - "collection": collection, 34 - "repo": repo, 35 - "rkey": rkey, 36 - } 37 - if err := c.Do(ctx, xrpc.Query, "", "com.atproto.repo.getRecord", params, nil, &out); err != nil { 38 - return nil, err 39 - } 40 - 41 - return &out, nil 42 - }
-53
cmd/astrolabe/repolistRecords.go
··· 1 - // Copied from indigo:api/atproto/repolistRecords.go 2 - 3 - package main 4 - 5 - // schema: com.atproto.repo.listRecords 6 - 7 - import ( 8 - "context" 9 - "encoding/json" 10 - 11 - "github.com/bluesky-social/indigo/xrpc" 12 - ) 13 - 14 - // RepoListRecords_Output is the output of a com.atproto.repo.listRecords call. 15 - type RepoListRecords_Output struct { 16 - Cursor *string `json:"cursor,omitempty" cborgen:"cursor,omitempty"` 17 - Records []*RepoListRecords_Record `json:"records" cborgen:"records"` 18 - } 19 - 20 - // RepoListRecords_Record is a "record" in the com.atproto.repo.listRecords schema. 21 - type RepoListRecords_Record struct { 22 - Cid string `json:"cid" cborgen:"cid"` 23 - Uri string `json:"uri" cborgen:"uri"` 24 - // NOTE: changed from lex decoder to json.RawMessage 25 - Value *json.RawMessage `json:"value" cborgen:"value"` 26 - } 27 - 28 - // RepoListRecords calls the XRPC method "com.atproto.repo.listRecords". 29 - // 30 - // collection: The NSID of the record type. 31 - // limit: The number of records to return. 32 - // repo: The handle or DID of the repo. 33 - // reverse: Flag to reverse the order of the returned records. 34 - // rkeyEnd: DEPRECATED: The highest sort-ordered rkey to stop at (exclusive) 35 - // rkeyStart: DEPRECATED: The lowest sort-ordered rkey to start from (exclusive) 36 - func RepoListRecords(ctx context.Context, c *xrpc.Client, collection string, cursor string, limit int64, repo string, reverse bool, rkeyEnd string, rkeyStart string) (*RepoListRecords_Output, error) { 37 - var out RepoListRecords_Output 38 - 39 - params := map[string]interface{}{ 40 - "collection": collection, 41 - "cursor": cursor, 42 - "limit": limit, 43 - "repo": repo, 44 - "reverse": reverse, 45 - "rkeyEnd": rkeyEnd, 46 - "rkeyStart": rkeyStart, 47 - } 48 - if err := c.Do(ctx, xrpc.Query, "", "com.atproto.repo.listRecords", params, nil, &out); err != nil { 49 - return nil, err 50 - } 51 - 52 - return &out, nil 53 - }
+2 -2
cmd/beemo/Dockerfile
··· 3 3 # podman build -f ./cmd/beemo/Dockerfile -t beemo . 4 4 5 5 ### Compile stage 6 - FROM golang:1.22-alpine3.19 AS build-env 6 + FROM golang:1.23-alpine3.20 AS build-env 7 7 RUN apk add --no-cache build-base make git 8 8 9 9 ADD . /dockerbuild ··· 15 15 go build -tags timetzdata -o /beemo ./cmd/beemo 16 16 17 17 ### Run stage 18 - FROM alpine:3.19 18 + FROM alpine:3.20 19 19 20 20 RUN apk add --no-cache --update dumb-init ca-certificates 21 21 ENTRYPOINT ["dumb-init", "--"]
+2 -20
cmd/beemo/firehose_consumer.go
··· 7 7 "log/slog" 8 8 "net/http" 9 9 "net/url" 10 - "strings" 11 10 12 11 comatproto "github.com/bluesky-social/indigo/api/atproto" 13 12 appbsky "github.com/bluesky-social/indigo/api/bsky" ··· 57 56 ) 58 57 logger.Info("beemo firehose scheduler configured", "scheduler", "parallel", "workers", parallelism) 59 58 60 - return events.HandleRepoStream(ctx, con, scheduler) 61 - } 62 - 63 - // TODO: move this to a "ParsePath" helper in syntax package? 64 - func splitRepoPath(path string) (syntax.NSID, syntax.RecordKey, error) { 65 - parts := strings.SplitN(path, "/", 3) 66 - if len(parts) != 2 { 67 - return "", "", fmt.Errorf("invalid record path: %s", path) 68 - } 69 - collection, err := syntax.ParseNSID(parts[0]) 70 - if err != nil { 71 - return "", "", err 72 - } 73 - rkey, err := syntax.ParseRecordKey(parts[1]) 74 - if err != nil { 75 - return "", "", err 76 - } 77 - return collection, rkey, nil 59 + return events.HandleRepoStream(ctx, con, scheduler, logger) 78 60 } 79 61 80 62 // NOTE: for now, this function basically never errors, just logs and returns nil. Should think through error processing better. ··· 102 84 103 85 for _, op := range evt.Ops { 104 86 logger = logger.With("eventKind", op.Action, "path", op.Path) 105 - collection, rkey, err := splitRepoPath(op.Path) 87 + collection, rkey, err := syntax.ParseRepoPath(op.Path) 106 88 if err != nil { 107 89 logger.Error("invalid path in repo op") 108 90 return nil
+1
cmd/beemo/notify_reports.go
··· 83 83 false, // hasComment bool 84 84 true, // includeAllUserRecords bool 85 85 limit, // limit int64 86 + nil, // policies []string 86 87 nil, // removedLabels []string 87 88 nil, // removedTags []string 88 89 nil, // reportTypes []string
+2 -2
cmd/bigsky/Dockerfile
··· 3 3 # podman build -f ./cmd/bigsky/Dockerfile -t bigsky . 4 4 5 5 ### Compile stage 6 - FROM golang:1.22-alpine3.19 AS build-env 6 + FROM golang:1.23-alpine3.20 AS build-env 7 7 RUN apk add --no-cache build-base make git 8 8 9 9 ADD . /dockerbuild ··· 26 26 RUN yarn build 27 27 28 28 ### Run stage 29 - FROM alpine:3.19 29 + FROM alpine:3.20 30 30 31 31 RUN apk add --no-cache --update dumb-init ca-certificates runit 32 32 ENTRYPOINT ["dumb-init", "--"]
+224
cmd/bigsky/copy_pdses.py
··· 1 + #!/usr/bin/env python3 2 + # 3 + # pip install requests 4 + # 5 + # python3 copy_pdses.py --admin-key hunter2 --source-url http://srcrelay:2470 --dest-url http://destrelay:2470 6 + 7 + import json 8 + import logging 9 + import sys 10 + import urllib.parse 11 + 12 + import requests 13 + 14 + logger = logging.getLogger(__name__) 15 + 16 + class relay: 17 + def __init__(self, rooturl, headers=None, session=None): 18 + "rooturl string, headers dict or None, session requests.Session() or None" 19 + self.rooturl = rooturl 20 + self.headers = headers or dict() 21 + self.session = session or requests.Session() 22 + 23 + def crawl(self, host): 24 + pheaders = dict(self.headers) 25 + pheaders['Content-Type'] = 'application/json' 26 + url = urllib.parse.urljoin(self.rooturl, '/admin/pds/requestCrawl') 27 + response = self.session.post(url, headers=pheaders, data=json.dumps({"hostname": host})) 28 + if response.status_code != 200: 29 + return False 30 + return True 31 + 32 + def crawlAndSetLimits(self, host, limits): 33 + "host string, limits dict" 34 + if not self.crawl(host): 35 + logger.error("%s %s : %d %r", url, host, response.status_code, response.text) 36 + return 37 + if limits is None: 38 + logger.debug("requestCrawl %s OK", host) 39 + if self.setLimits(host, limits): 40 + logger.debug("requestCrawl + changeLimits %s OK", host) 41 + def setLimits(self, host, limits): 42 + url = urllib.parse.urljoin(self.rooturl, '/admin/pds/changeLimits') 43 + plimits = dict(limits) 44 + plimits["host"] = host 45 + pheaders = dict(self.headers) 46 + pheaders['Content-Type'] = 'application/json' 47 + response = self.session.post(url, headers=pheaders, data=json.dumps(plimits)) 48 + if response.status_code != 200: 49 + logger.error("%s %s : %d %r", url, host, response.status_code, response.text) 50 + return False 51 + return True 52 + 53 + def crawlAndBlock(self, host): 54 + "make relay aware of PDS, and block it" 55 + if not self.crawl(host): 56 + logger.error("%s %s : %d %r", url, host, response.status_code, response.text) 57 + return 58 + if self.block(host): 59 + logger.debug("requestCrawl + block %s OK", host) 60 + 61 + def block(self, host): 62 + url = urllib.parse.urljoin(self.rooturl, '/admin/pds/block') 63 + response = self.session.post(url, headers=self.headers, data='', params={"host":host}) 64 + if response.status_code != 200: 65 + logger.error("%s %s : %d %r", url, host, response.status_code, response.text) 66 + return False 67 + return True 68 + 69 + def unblock(self, host): 70 + url = urllib.parse.urljoin(self.rooturl, '/admin/pds/unblock') 71 + response = self.session.post(url, headers=self.headers, data='', params={"host":host}) 72 + if response.status_code != 200: 73 + logger.error("%s %s : %d %r", url, host, response.status_code, response.text) 74 + return False 75 + return True 76 + 77 + def pdsList(self): 78 + "GET /admin/pds/list" 79 + url = urllib.parse.urljoin(self.rooturl, '/admin/pds/list') 80 + response = self.session.get(url, headers=self.headers) 81 + if response.status_code != 200: 82 + logger.error("%s : %d %r", url, response.status_code, response.text) 83 + return None 84 + return response.json() 85 + 86 + def makeByHost(they): 87 + out = dict() 88 + for rec in they: 89 + out[rec['Host']] = rec 90 + return out 91 + 92 + def makeLimits(rec): 93 + "for submitting to changeLimits" 94 + return { 95 + "host": rec['Host'], 96 + "per_second":rec['RateLimit'], 97 + "per_hour":rec['HourlyEventLimit'], 98 + "per_day":rec['DailyEventLimit'], 99 + "crawl_rate":rec['CrawlRateLimit'], 100 + "repo_limit":rec['RepoLimit'], 101 + } 102 + 103 + def makeRequestCrawl(rec): 104 + "for submitting to requestCrawl" 105 + return {"hostname":rec["Host"]} 106 + 107 + def de(a,b): 108 + # dict equal 109 + for ka, va in a.items(): 110 + vb = b[ka] 111 + if (va is None) and (vb is None): 112 + continue 113 + if va == vb: 114 + continue 115 + return False 116 + for kb in b.keys(): 117 + if kb not in a: 118 + return False 119 + return True 120 + 121 + def main(): 122 + import argparse 123 + ap = argparse.ArgumentParser() 124 + ap.add_argument('--admin-key', default=None, help='relay auth bearer token', required=True) 125 + ap.add_argument('--source-url', default=None, help='base url to GET /admin/pds/list') 126 + ap.add_argument('--source-json', default=None, help='load /admin/pds/list json from file') 127 + ap.add_argument('--dest-url', default=None, help='dest URL to POST requestCrawl etc to') 128 + ap.add_argument('--dry-run', default=False, action='store_true') 129 + ap.add_argument('--verbose', default=False, action='store_true') 130 + args = ap.parse_args() 131 + 132 + if args.verbose: 133 + logging.basicConfig(level=logging.DEBUG) 134 + else: 135 + logging.basicConfig(level=logging.INFO) 136 + 137 + headers = {'Authorization': 'Bearer ' + args.admin_key} 138 + 139 + if args.source_json: 140 + with open(args.source_json, 'rt') as fin: 141 + sourceList = json.load(fin) 142 + elif args.source_url: 143 + relaySession = relay(args.source_url, headers) 144 + sourceList = relaySession.pdsList() 145 + else: 146 + sys.stdout.write("need --source-url or --source-json\n") 147 + sys.exit(1) 148 + 149 + r2 = relay(args.dest_url, headers) 150 + destList = r2.pdsList() 151 + 152 + source = makeByHost(sourceList) 153 + dests = makeByHost(destList) 154 + 155 + snotd = [] 156 + dnots = [] 157 + diflim = [] 158 + difblock = [] 159 + recrawl = [] 160 + 161 + for k1, v1 in source.items(): 162 + v2 = dests.get(k1) 163 + if v2 is None: 164 + snotd.append(v1) 165 + continue 166 + lim1 = makeLimits(v1) 167 + lim2 = makeLimits(v2) 168 + if v1["Blocked"] != v2["Blocked"]: 169 + difblock.append((k1,v1["Blocked"])) 170 + if v1["Blocked"]: 171 + continue 172 + if not de(lim1, lim2): 173 + diflim.append(lim1) 174 + if v1["HasActiveConnection"] and not v2["HasActiveConnection"]: 175 + recrawl.append(k1) 176 + for k2 in dests.keys(): 177 + if k2 not in source: 178 + dnots.append(k2) 179 + 180 + logger.debug("%d source not dest", len(snotd)) 181 + for rec in snotd: 182 + if rec["Blocked"]: 183 + if args.dry_run: 184 + sys.stdout.write("crawl and block: {!r}\n".format(rec["Host"])) 185 + else: 186 + r2.crawlAndBlock(rec["Host"]) 187 + else: 188 + limits = makeLimits(rec) 189 + if args.dry_run: 190 + sys.stdout.write("crawl and limit: {}\n".format(json.dumps(limits))) 191 + else: 192 + r2.crawlAndSetLimits(rec["Host"], limits) 193 + logger.debug("adjust limits: %d", len(diflim)) 194 + for limits in diflim: 195 + if args.dry_run: 196 + sys.stdout.write("set limits: {}\n".format(json.dumps(limits))) 197 + else: 198 + r2.setLimits(limits["host"], limits) 199 + logger.debug("adjust block status: %d", len(difblock)) 200 + for host, blocked in difblock: 201 + if args.dry_run: 202 + sys.stdout.write("{} block={}\n".format(host, blocked)) 203 + else: 204 + if blocked: 205 + r2.block(host) 206 + else: 207 + r2.unblock(host) 208 + logger.debug("restart requestCrawl: %d", len(recrawl)) 209 + for host in recrawl: 210 + if args.dry_run: 211 + logger.info("requestCrawl %s", host) 212 + else: 213 + if r2.crawl(host): 214 + logger.debug("requestCrawl %s OK", host) 215 + logger.info("%d in dest but not source", len(dnots)) 216 + for k2 in dnots: 217 + logger.debug("%s", k2) 218 + 219 + 220 + 221 + 222 + 223 + if __name__ == '__main__': 224 + main()
+111 -31
cmd/bigsky/main.go
··· 3 3 import ( 4 4 "context" 5 5 "fmt" 6 + "log/slog" 6 7 "net/http" 7 8 _ "net/http/pprof" 9 + "net/url" 8 10 "os" 9 11 "os/signal" 10 12 "path/filepath" ··· 29 31 _ "go.uber.org/automaxprocs" 30 32 31 33 "github.com/carlmjohnson/versioninfo" 32 - logging "github.com/ipfs/go-log" 33 34 "github.com/urfave/cli/v2" 34 35 "go.opentelemetry.io/otel" 35 36 "go.opentelemetry.io/otel/attribute" ··· 41 42 "gorm.io/plugin/opentelemetry/tracing" 42 43 ) 43 44 44 - var log = logging.Logger("bigsky") 45 + var log = slog.Default().With("system", "bigsky") 45 46 46 47 func init() { 47 48 // control log level using, eg, GOLOG_LOG_LEVEL=debug ··· 50 51 51 52 func main() { 52 53 if err := run(os.Args); err != nil { 53 - log.Fatal(err) 54 + slog.Error(err.Error()) 55 + os.Exit(1) 54 56 } 55 57 } 56 58 ··· 189 191 EnvVars: []string{"RELAY_DID_CACHE_SIZE"}, 190 192 Value: 5_000_000, 191 193 }, 194 + &cli.StringSliceFlag{ 195 + Name: "did-memcached", 196 + EnvVars: []string{"RELAY_DID_MEMCACHED"}, 197 + }, 192 198 &cli.DurationFlag{ 193 199 Name: "event-playback-ttl", 194 200 Usage: "time to live for event playback buffering (only applies to disk persister)", ··· 200 206 EnvVars: []string{"RELAY_NUM_COMPACTION_WORKERS"}, 201 207 Value: 2, 202 208 }, 209 + &cli.StringSliceFlag{ 210 + Name: "carstore-shard-dirs", 211 + Usage: "specify list of shard directories for carstore storage, overrides default storage within datadir", 212 + EnvVars: []string{"RELAY_CARSTORE_SHARD_DIRS"}, 213 + }, 214 + &cli.StringSliceFlag{ 215 + Name: "next-crawler", 216 + Usage: "forward POST requestCrawl to this url, should be machine root url and not xrpc/requestCrawl, comma separated list", 217 + EnvVars: []string{"RELAY_NEXT_CRAWLER"}, 218 + }, 219 + &cli.BoolFlag{ 220 + Name: "non-archival", 221 + EnvVars: []string{"RELAY_NON_ARCHIVAL"}, 222 + Value: false, 223 + }, 203 224 } 204 225 205 226 app.Action = runBigsky ··· 213 234 env = "dev" 214 235 } 215 236 if cctx.Bool("jaeger") { 216 - url := "http://localhost:14268/api/traces" 217 - exp, err := jaeger.New(jaeger.WithCollectorEndpoint(jaeger.WithEndpoint(url))) 237 + jaegerUrl := "http://localhost:14268/api/traces" 238 + exp, err := jaeger.New(jaeger.WithCollectorEndpoint(jaeger.WithEndpoint(jaegerUrl))) 218 239 if err != nil { 219 240 return err 220 241 } ··· 240 261 // At a minimum, you need to set 241 262 // OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 242 263 if ep := cctx.String("otel-exporter-otlp-endpoint"); ep != "" { 243 - log.Infow("setting up trace exporter", "endpoint", ep) 264 + slog.Info("setting up trace exporter", "endpoint", ep) 244 265 ctx, cancel := context.WithCancel(context.Background()) 245 266 defer cancel() 246 267 247 268 exp, err := otlptracehttp.New(ctx) 248 269 if err != nil { 249 - log.Fatalw("failed to create trace exporter", "error", err) 270 + slog.Error("failed to create trace exporter", "error", err) 271 + os.Exit(1) 250 272 } 251 273 defer func() { 252 274 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 253 275 defer cancel() 254 276 if err := exp.Shutdown(ctx); err != nil { 255 - log.Errorw("failed to shutdown trace exporter", "error", err) 277 + slog.Error("failed to shutdown trace exporter", "error", err) 256 278 } 257 279 }() 258 280 ··· 277 299 signals := make(chan os.Signal, 1) 278 300 signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM) 279 301 302 + _, err := cliutil.SetupSlog(cliutil.LogOptions{}) 303 + if err != nil { 304 + return err 305 + } 306 + 280 307 // start observability/tracing (OTEL and jaeger) 281 308 if err := setupOTEL(cctx); err != nil { 282 309 return err ··· 289 316 return err 290 317 } 291 318 292 - log.Infow("setting up main database") 319 + slog.Info("setting up main database") 293 320 dburl := cctx.String("db-url") 294 321 db, err := cliutil.SetupDatabase(dburl, cctx.Int("max-metadb-connections")) 295 322 if err != nil { 296 323 return err 297 324 } 298 325 299 - log.Infow("setting up carstore database") 326 + slog.Info("setting up carstore database") 300 327 csdburl := cctx.String("carstore-db-url") 301 328 csdb, err := cliutil.SetupDatabase(csdburl, cctx.Int("max-carstore-connections")) 302 329 if err != nil { ··· 312 339 } 313 340 } 314 341 315 - os.MkdirAll(filepath.Dir(csdir), os.ModePerm) 316 - cstore, err := carstore.NewCarStore(csdb, csdir) 317 - if err != nil { 318 - return err 342 + csdirs := []string{csdir} 343 + if paramDirs := cctx.StringSlice("carstore-shard-dirs"); len(paramDirs) > 0 { 344 + csdirs = paramDirs 319 345 } 320 346 321 - mr := did.NewMultiResolver() 347 + for _, csd := range csdirs { 348 + if err := os.MkdirAll(filepath.Dir(csd), os.ModePerm); err != nil { 349 + return err 350 + } 351 + } 322 352 323 - didr := &api.PLCServer{Host: cctx.String("plc-host")} 324 - mr.AddHandler("plc", didr) 353 + var cstore carstore.CarStore 325 354 326 - webr := did.WebResolver{} 327 - if cctx.Bool("crawl-insecure-ws") { 328 - webr.Insecure = true 355 + if cctx.Bool("non-archival") { 356 + cs, err := carstore.NewNonArchivalCarstore(csdb) 357 + if err != nil { 358 + return err 359 + } 360 + 361 + cstore = cs 362 + } else { 363 + cs, err := carstore.NewCarStore(csdb, csdirs) 364 + if err != nil { 365 + return err 366 + } 367 + 368 + cstore = cs 329 369 } 330 - mr.AddHandler("web", &webr) 370 + 371 + // DID RESOLUTION 372 + // 1. the outside world, PLCSerever or Web 373 + // 2. (maybe memcached) 374 + // 3. in-process cache 375 + var cachedidr did.Resolver 376 + { 377 + mr := did.NewMultiResolver() 331 378 332 - cachedidr := plc.NewCachingDidResolver(mr, time.Hour*24, cctx.Int("did-cache-size")) 379 + didr := &api.PLCServer{Host: cctx.String("plc-host")} 380 + mr.AddHandler("plc", didr) 381 + 382 + webr := did.WebResolver{} 383 + if cctx.Bool("crawl-insecure-ws") { 384 + webr.Insecure = true 385 + } 386 + mr.AddHandler("web", &webr) 387 + 388 + var prevResolver did.Resolver 389 + memcachedServers := cctx.StringSlice("did-memcached") 390 + if len(memcachedServers) > 0 { 391 + prevResolver = plc.NewMemcachedDidResolver(mr, time.Hour*24, memcachedServers) 392 + } else { 393 + prevResolver = mr 394 + } 395 + 396 + cachedidr = plc.NewCachingDidResolver(prevResolver, time.Hour*24, cctx.Int("did-cache-size")) 397 + } 333 398 334 399 kmgr := indexer.NewKeyManager(cachedidr, nil) 335 400 ··· 338 403 var persister events.EventPersistence 339 404 340 405 if dpd := cctx.String("disk-persister-dir"); dpd != "" { 341 - log.Infow("setting up disk persister") 406 + slog.Info("setting up disk persister") 342 407 343 408 pOpts := events.DefaultDiskPersistOptions() 344 409 pOpts.Retention = cctx.Duration("event-playback-ttl") ··· 361 426 362 427 rf := indexer.NewRepoFetcher(db, repoman, cctx.Int("max-fetch-concurrency")) 363 428 364 - ix, err := indexer.NewIndexer(db, notifman, evtman, cachedidr, rf, true, cctx.Bool("spidering"), false) 429 + ix, err := indexer.NewIndexer(db, notifman, evtman, cachedidr, rf, true, false, cctx.Bool("spidering")) 365 430 if err != nil { 366 431 return err 367 432 } 433 + defer ix.Shutdown() 368 434 369 435 rlskip := cctx.String("bsky-social-rate-limit-skip") 370 436 ix.ApplyPDSClientSettings = func(c *xrpc.Client) { ··· 387 453 388 454 repoman.SetEventHandler(func(ctx context.Context, evt *repomgr.RepoEvent) { 389 455 if err := ix.HandleRepoEvent(ctx, evt); err != nil { 390 - log.Errorw("failed to handle repo event", "err", err) 456 + slog.Error("failed to handle repo event", "err", err) 391 457 } 392 458 }, false) 393 459 ··· 411 477 } 412 478 } 413 479 414 - log.Infow("constructing bgs") 480 + slog.Info("constructing bgs") 415 481 bgsConfig := libbgs.DefaultBGSConfig() 416 482 bgsConfig.SSL = !cctx.Bool("crawl-insecure-ws") 417 483 bgsConfig.CompactInterval = cctx.Duration("compact-interval") ··· 419 485 bgsConfig.MaxQueuePerPDS = cctx.Int64("max-queue-per-pds") 420 486 bgsConfig.DefaultRepoLimit = cctx.Int64("default-repo-limit") 421 487 bgsConfig.NumCompactionWorkers = cctx.Int("num-compaction-workers") 488 + nextCrawlers := cctx.StringSlice("next-crawler") 489 + if len(nextCrawlers) != 0 { 490 + nextCrawlerUrls := make([]*url.URL, len(nextCrawlers)) 491 + for i, tu := range nextCrawlers { 492 + var err error 493 + nextCrawlerUrls[i], err = url.Parse(tu) 494 + if err != nil { 495 + return fmt.Errorf("failed to parse next-crawler url: %w", err) 496 + } 497 + slog.Info("configuring relay for requestCrawl", "host", nextCrawlerUrls[i]) 498 + } 499 + bgsConfig.NextCrawlers = nextCrawlerUrls 500 + } 422 501 bgs, err := libbgs.NewBGS(db, ix, repoman, evtman, cachedidr, rf, hr, bgsConfig) 423 502 if err != nil { 424 503 return err ··· 433 512 // set up metrics endpoint 434 513 go func() { 435 514 if err := bgs.StartMetrics(cctx.String("metrics-listen")); err != nil { 436 - log.Fatalf("failed to start metrics endpoint: %s", err) 515 + log.Error("failed to start metrics endpoint", "err", err) 516 + os.Exit(1) 437 517 } 438 518 }() 439 519 ··· 444 524 bgsErr <- err 445 525 }() 446 526 447 - log.Infow("startup complete") 527 + slog.Info("startup complete") 448 528 select { 449 529 case <-signals: 450 530 log.Info("received shutdown signal") 451 531 errs := bgs.Shutdown() 452 532 for err := range errs { 453 - log.Errorw("error during BGS shutdown", "err", err) 533 + slog.Error("error during BGS shutdown", "err", err) 454 534 } 455 535 case err := <-bgsErr: 456 536 if err != nil { 457 - log.Errorw("error during BGS startup", "err", err) 537 + slog.Error("error during BGS startup", "err", err) 458 538 } 459 539 log.Info("shutting down") 460 540 errs := bgs.Shutdown() 461 541 for err := range errs { 462 - log.Errorw("error during BGS shutdown", "err", err) 542 + slog.Error("error during BGS shutdown", "err", err) 463 543 } 464 544 } 465 545
+115
cmd/bigsky/resync_pdses.py
··· 1 + #!/usr/bin/env python3 2 + # 3 + # pip install requests 4 + # 5 + # python3 resync_pdses.py --admin-key hunter2 --url http://myrelay:2470 host_per_line.txt 6 + 7 + import json 8 + import sys 9 + import urllib.parse 10 + 11 + import requests 12 + 13 + 14 + # pds limits for POST /admin/pds/changeLimits 15 + # {"host":"", "per_second": int, "per_hour": int, "per_day": int, "crawl_rate": int, "repo_limit": int} 16 + 17 + limitsKeys = ('per_second', 'per_hour', 'per_day', 'crawl_rate', 'repo_limit') 18 + 19 + def checkLimits(limits): 20 + for k in limits.keys(): 21 + if k not in limitsKeys: 22 + raise Exception(f"unknown pds rate limits key {k!r}") 23 + return True 24 + 25 + class relay: 26 + def __init__(self, rooturl, headers=None, session=None): 27 + "rooturl string, headers dict or None, session requests.Session() or None" 28 + self.rooturl = rooturl 29 + self.headers = headers or dict() 30 + self.session = session or requests.Session() 31 + 32 + def resync(self, host): 33 + "host string" 34 + url = urllib.parse.urljoin(self.rooturl, '/admin/pds/resync') 35 + response = self.session.post(url, params={"host": host}, headers=self.headers, data='') 36 + if response.status_code != 200: 37 + sys.stderr.write(f"{url}?host={host} : ({response.status_code}) ({response.text!r})\n") 38 + else: 39 + sys.stderr.write(f"{url}?host={host} : OK\n") 40 + 41 + def crawlAndSetLimits(self, host, limits): 42 + "host string, limits dict" 43 + pheaders = dict(self.headers) 44 + pheaders['Content-Type'] = 'application/json' 45 + url = urllib.parse.urljoin(self.rooturl, '/admin/pds/requestCrawl') 46 + response = self.session.post(url, headers=pheaders, data=json.dumps({"hostname": host})) 47 + if response.status_code != 200: 48 + sys.stderr.write(f"{url} {host} : {response.status_code} {response.text!r}\n") 49 + return 50 + if limits is None: 51 + sys.stderr.write(f"requestCrawl {host} OK\n") 52 + url = urllib.parse.urljoin(self.rooturl, '/admin/pds/changeLimits') 53 + plimits = dict(limits) 54 + plimits["host"] = host 55 + response = self.session.post(url, headers=pheaders, data=json.dumps(plimits)) 56 + if response.status_code != 200: 57 + sys.stderr.write(f"{url} {host} : {response.status_code} {response.text!r}\n") 58 + return 59 + sys.stderr.write(f"requestCrawl + changeLimits {host} OK\n") 60 + 61 + def main(): 62 + import argparse 63 + ap = argparse.ArgumentParser() 64 + ap.add_argument('input', default='-', help='host per line text file to read, - for stdin') 65 + ap.add_argument('--admin-key', default=None, help='relay auth bearer token', required=True) 66 + ap.add_argument('--url', default=None, help='base url to POST /admin/pds/resync', required=True) 67 + ap.add_argument('--resync', default=False, action='store_true', help='resync selected PDSes') 68 + ap.add_argument('--limits', default=None, help='json pds rate limits') 69 + ap.add_argument('--crawl', default=False, action='store_true', help='crawl & set limits') 70 + args = ap.parse_args() 71 + 72 + headers = {'Authorization': 'Bearer ' + args.admin_key} 73 + 74 + relaySession = relay(args.url, headers) 75 + 76 + #url = urllib.parse.urljoin(args.url, '/admin/pds/resync') 77 + 78 + #sess = requests.Session() 79 + if args.crawl and args.resync: 80 + sys.stderr.write("should only specify one of --resync --crawl") 81 + sys.exit(1) 82 + if (not args.crawl) and (not args.resync): 83 + sys.stderr.write("should specify one of --resync --crawl") 84 + sys.exit(1) 85 + 86 + limits = None 87 + if args.limits: 88 + limits = json.loads(args.limits) 89 + checkLimits(limits) 90 + 91 + if args.input == '-': 92 + fin = sys.stdin 93 + else: 94 + fin = open(args.input, 'rt') 95 + for line in fin: 96 + if not line: 97 + continue 98 + line = line.strip() 99 + if not line: 100 + continue 101 + if line[0] == '#': 102 + continue 103 + host = line 104 + if args.crawl: 105 + relaySession.crawlAndSetLimits(host, limits) 106 + elif args.resync: 107 + relaySession.resync(host) 108 + # response = sess.post(url, params={"host": line}, headers=headers) 109 + # if response.status_code != 200: 110 + # sys.stderr.write(f"{url}?host={line} : ({response.status_code}) ({response.text!r})\n") 111 + # else: 112 + # sys.stderr.write(f"{url}?host={line} : OK\n") 113 + 114 + if __name__ == '__main__': 115 + main()
+6 -1
cmd/goat/account.go
··· 38 38 EnvVars: []string{"ATP_AUTH_PASSWORD"}, 39 39 }, 40 40 &cli.StringFlag{ 41 + Name: "auth-factor-token", 42 + Usage: "token required if password is used and 2fa is required", 43 + EnvVars: []string{"ATP_AUTH_FACTOR_TOKEN"}, 44 + }, 45 + &cli.StringFlag{ 41 46 Name: "pds-host", 42 47 Usage: "URL of the PDS to create account on (overrides DID doc)", 43 48 EnvVars: []string{"ATP_PDS_HOST"}, ··· 163 168 return err 164 169 } 165 170 166 - _, err = refreshAuthSession(ctx, *username, cctx.String("app-password"), cctx.String("pds-host")) 171 + _, err = refreshAuthSession(ctx, *username, cctx.String("app-password"), cctx.String("pds-host"), cctx.String("auth-factor-token")) 167 172 return err 168 173 } 169 174
+7 -6
cmd/goat/account_migrate.go
··· 9 9 "strings" 10 10 "time" 11 11 12 + "github.com/bluesky-social/indigo/api/agnostic" 12 13 comatproto "github.com/bluesky-social/indigo/api/atproto" 13 14 "github.com/bluesky-social/indigo/atproto/syntax" 14 15 "github.com/bluesky-social/indigo/xrpc" ··· 166 167 167 168 slog.Info("migrating preferences") 168 169 // TODO: service proxy header for AppView? 169 - prefResp, err := ActorGetPreferences(ctx, oldClient) 170 + prefResp, err := agnostic.ActorGetPreferences(ctx, oldClient) 170 171 if err != nil { 171 172 return fmt.Errorf("failed fetching old preferences: %w", err) 172 173 } 173 - err = ActorPutPreferences(ctx, &newClient, &ActorPutPreferences_Input{ 174 + err = agnostic.ActorPutPreferences(ctx, &newClient, &agnostic.ActorPutPreferences_Input{ 174 175 Preferences: prefResp.Preferences, 175 176 }) 176 177 if err != nil { ··· 214 215 // NOTE: to work with did:web or non-PDS-managed did:plc, need to do manual migraiton process 215 216 slog.Info("updating identity to new host") 216 217 217 - credsResp, err := IdentityGetRecommendedDidCredentials(ctx, &newClient) 218 + credsResp, err := agnostic.IdentityGetRecommendedDidCredentials(ctx, &newClient) 218 219 if err != nil { 219 220 return fmt.Errorf("failed fetching new credentials: %w", err) 220 221 } ··· 223 224 return nil 224 225 } 225 226 226 - var unsignedOp IdentitySignPlcOperation_Input 227 + var unsignedOp agnostic.IdentitySignPlcOperation_Input 227 228 if err = json.Unmarshal(credsBytes, &unsignedOp); err != nil { 228 229 return fmt.Errorf("failed parsing PLC op: %w", err) 229 230 } ··· 231 232 232 233 // NOTE: could add additional sanity checks here that any extra rotation keys were retained, and that old alsoKnownAs and service entries are retained? The stakes aren't super high for the later, as PLC has the full history. PLC and the new PDS already implement some basic sanity checks. 233 234 234 - signedPlcOpResp, err := IdentitySignPlcOperation(ctx, oldClient, &unsignedOp) 235 + signedPlcOpResp, err := agnostic.IdentitySignPlcOperation(ctx, oldClient, &unsignedOp) 235 236 if err != nil { 236 237 return fmt.Errorf("failed requesting PLC operation signature: %w", err) 237 238 } 238 239 239 - err = IdentitySubmitPlcOperation(ctx, &newClient, &IdentitySubmitPlcOperation_Input{ 240 + err = agnostic.IdentitySubmitPlcOperation(ctx, &newClient, &agnostic.IdentitySubmitPlcOperation_Input{ 240 241 Operation: signedPlcOpResp.Operation, 241 242 }) 242 243 if err != nil {
+5 -4
cmd/goat/account_plc.go
··· 6 6 "fmt" 7 7 "os" 8 8 9 + "github.com/bluesky-social/indigo/api/agnostic" 9 10 comatproto "github.com/bluesky-social/indigo/api/atproto" 10 11 11 12 "github.com/urfave/cli/v2" ··· 56 57 return err 57 58 } 58 59 59 - resp, err := IdentityGetRecommendedDidCredentials(ctx, xrpcc) 60 + resp, err := agnostic.IdentityGetRecommendedDidCredentials(ctx, xrpcc) 60 61 if err != nil { 61 62 return err 62 63 } ··· 109 110 return err 110 111 } 111 112 112 - var body IdentitySignPlcOperation_Input 113 + var body agnostic.IdentitySignPlcOperation_Input 113 114 if err = json.Unmarshal(fileBytes, &body); err != nil { 114 115 return fmt.Errorf("failed decoding PLC op JSON: %w", err) 115 116 } ··· 119 120 body.Token = &token 120 121 } 121 122 122 - resp, err := IdentitySignPlcOperation(ctx, xrpcc, &body) 123 + resp, err := agnostic.IdentitySignPlcOperation(ctx, xrpcc, &body) 123 124 if err != nil { 124 125 return err 125 126 } ··· 158 159 return fmt.Errorf("failed decoding PLC op JSON: %w", err) 159 160 } 160 161 161 - err = IdentitySubmitPlcOperation(ctx, xrpcc, &IdentitySubmitPlcOperation_Input{ 162 + err = agnostic.IdentitySubmitPlcOperation(ctx, xrpcc, &agnostic.IdentitySubmitPlcOperation_Input{ 162 163 Operation: &op, 163 164 }) 164 165 if err != nil {
+2 -2
cmd/goat/actorgetPreferences.go api/agnostic/actorgetPreferences.go
··· 1 - // Copied from indigo:api/atproto/actorgetPreferences.go 1 + // Copied from indigo:api/bsky/actorgetPreferences.go 2 2 3 - package main 3 + package agnostic 4 4 5 5 // schema: app.bsky.actor.getPreferences 6 6
+2 -2
cmd/goat/actorputPreferences.go api/agnostic/actorputPreferences.go
··· 1 - // Copied from indigo:api/atproto/actorputPreferences.go 1 + // Copied from indigo:api/bsky/actorputPreferences.go 2 2 3 - package main 3 + package agnostic 4 4 5 5 // schema: app.bsky.actor.putPreferences 6 6
+10 -5
cmd/goat/auth.go
··· 77 77 } 78 78 resp, err := comatproto.ServerRefreshSession(ctx, &client) 79 79 if err != nil { 80 - // TODO: if failure, try creating a new session from password 80 + // TODO: if failure, try creating a new session from password (2fa tokens are only valid once, so not reused) 81 81 fmt.Println("trying to refresh auth from password...") 82 - as, err := refreshAuthSession(ctx, sess.DID.AtIdentifier(), sess.Password, sess.PDS) 82 + as, err := refreshAuthSession(ctx, sess.DID.AtIdentifier(), sess.Password, sess.PDS, "") 83 83 if err != nil { 84 84 return nil, err 85 85 } ··· 96 96 return &client, nil 97 97 } 98 98 99 - func refreshAuthSession(ctx context.Context, username syntax.AtIdentifier, password, pdsURL string) (*AuthSession, error) { 99 + func refreshAuthSession(ctx context.Context, username syntax.AtIdentifier, password, pdsURL, authFactorToken string) (*AuthSession, error) { 100 100 101 101 var did syntax.DID 102 102 if pdsURL == "" { ··· 120 120 client := xrpc.Client{ 121 121 Host: pdsURL, 122 122 } 123 + var token *string 124 + if authFactorToken != "" { 125 + token = &authFactorToken 126 + } 123 127 sess, err := comatproto.ServerCreateSession(ctx, &client, &comatproto.ServerCreateSession_Input{ 124 - Identifier: username.String(), 125 - Password: password, 128 + Identifier: username.String(), 129 + Password: password, 130 + AuthFactorToken: token, 126 131 }) 127 132 if err != nil { 128 133 return nil, err
+10 -1
cmd/goat/blob.go
··· 28 28 Aliases: []string{"o"}, 29 29 Usage: "directory to store blobs in", 30 30 }, 31 + &cli.StringFlag{ 32 + Name: "pds-host", 33 + Usage: "URL of the PDS to export blobs from (overrides DID doc)", 34 + }, 31 35 }, 32 36 Action: runBlobExport, 33 37 }, ··· 73 77 return err 74 78 } 75 79 80 + pdsHost := cctx.String("pds-host") 81 + if pdsHost == "" { 82 + pdsHost = ident.PDSEndpoint() 83 + } 84 + 76 85 // create a new API client to connect to the account's PDS 77 86 xrpcc := xrpc.Client{ 78 - Host: ident.PDSEndpoint(), 87 + Host: pdsHost, 79 88 } 80 89 if xrpcc.Host == "" { 81 90 return fmt.Errorf("no PDS endpoint for identity")
+4 -2
cmd/goat/bsky_prefs.go
··· 6 6 "fmt" 7 7 "os" 8 8 9 + "github.com/bluesky-social/indigo/api/agnostic" 10 + 9 11 "github.com/urfave/cli/v2" 10 12 ) 11 13 ··· 39 41 } 40 42 41 43 // TODO: does indigo API code crash with unsupported preference '$type'? Eg "Lexicon decoder" with unsupported type. 42 - resp, err := ActorGetPreferences(ctx, xrpcc) 44 + resp, err := agnostic.ActorGetPreferences(ctx, xrpcc) 43 45 if err != nil { 44 46 return fmt.Errorf("failed fetching old preferences: %w", err) 45 47 } ··· 77 79 return err 78 80 } 79 81 80 - err = ActorPutPreferences(ctx, xrpcc, &ActorPutPreferences_Input{ 82 + err = agnostic.ActorPutPreferences(ctx, xrpcc, &agnostic.ActorPutPreferences_Input{ 81 83 Preferences: prefsArray, 82 84 }) 83 85 if err != nil {
+2 -19
cmd/goat/firehose.go
··· 130 130 rsc.EventHandler, 131 131 ) 132 132 slog.Info("starting firehose consumer", "relayHost", relayHost) 133 - return events.HandleRepoStream(ctx, con, scheduler) 134 - } 135 - 136 - // TODO: move this to a "ParsePath" helper in syntax package? 137 - func splitRepoPath(path string) (syntax.NSID, syntax.RecordKey, error) { 138 - parts := strings.SplitN(path, "/", 3) 139 - if len(parts) != 2 { 140 - return "", "", fmt.Errorf("invalid record path: %s", path) 141 - } 142 - collection, err := syntax.ParseNSID(parts[0]) 143 - if err != nil { 144 - return "", "", err 145 - } 146 - rkey, err := syntax.ParseRecordKey(parts[1]) 147 - if err != nil { 148 - return "", "", err 149 - } 150 - return collection, rkey, nil 133 + return events.HandleRepoStream(ctx, con, scheduler, nil) 151 134 } 152 135 153 136 func (gfc *GoatFirehoseConsumer) handleIdentityEvent(ctx context.Context, evt *comatproto.SyncSubscribeRepos_Identity) error { ··· 229 212 } 230 213 231 214 for _, op := range evt.Ops { 232 - collection, rkey, err := splitRepoPath(op.Path) 215 + collection, rkey, err := syntax.ParseRepoPath(op.Path) 233 216 if err != nil { 234 217 logger.Error("invalid path in repo op", "eventKind", op.Action, "path", op.Path) 235 218 return nil
+20 -2
cmd/goat/identity.go
··· 15 15 Name: "resolve", 16 16 Usage: "lookup identity metadata", 17 17 ArgsUsage: `<at-identifier>`, 18 - Flags: []cli.Flag{}, 19 - Action: runResolve, 18 + Flags: []cli.Flag{ 19 + &cli.BoolFlag{ 20 + Name: "did", 21 + Usage: "just resolve to DID", 22 + }, 23 + }, 24 + Action: runResolve, 20 25 } 21 26 22 27 func runResolve(cctx *cli.Context) error { ··· 33 38 dir := identity.BaseDirectory{} 34 39 var doc *identity.DIDDocument 35 40 41 + if cctx.Bool("did") { 42 + if atid.IsDID() { 43 + } 44 + } 45 + 36 46 if atid.IsDID() { 37 47 did, err := atid.AsDID() 38 48 if err != nil { 39 49 return err 40 50 } 51 + if cctx.Bool("did") { 52 + fmt.Println(did) 53 + return nil 54 + } 41 55 doc, err = dir.ResolveDID(ctx, did) 42 56 if err != nil { 43 57 return err ··· 50 64 did, err := dir.ResolveHandle(ctx, handle) 51 65 if err != nil { 52 66 return err 67 + } 68 + if cctx.Bool("did") { 69 + fmt.Println(did) 70 + return nil 53 71 } 54 72 doc, err = dir.ResolveDID(ctx, did) 55 73 if err != nil {
+1 -1
cmd/goat/identitygetRecommendedDidCredentials.go api/agnostic/identitygetRecommendedDidCredentials.go
··· 1 1 // Copied from indigo:api/atproto/identitygetRecommendedDidCredentials.go 2 2 3 - package main 3 + package agnostic 4 4 5 5 // schema: com.atproto.identity.getRecommendedDidCredentials 6 6
+1 -1
cmd/goat/identitysignPlcOperation.go api/agnostic/identitysignPlcOperation.go
··· 1 1 // Copied from indigo:api/atproto/identitysignPlcOperation.go 2 2 3 - package main 3 + package agnostic 4 4 5 5 // schema: com.atproto.identity.signPlcOperation 6 6
+1 -1
cmd/goat/identitysubmitPlcOperation.go api/agnostic/identitysubmitPlcOperation.go
··· 1 1 // Copied from indigo:api/atproto/identitysubmitPlcOperation.go 2 2 3 - package main 3 + package agnostic 4 4 5 5 // schema: com.atproto.identity.submitPlcOperation 6 6
+2 -1
cmd/goat/net.go
··· 5 5 "fmt" 6 6 "log/slog" 7 7 8 + "github.com/bluesky-social/indigo/api/agnostic" 8 9 "github.com/bluesky-social/indigo/atproto/data" 9 10 "github.com/bluesky-social/indigo/atproto/identity" 10 11 "github.com/bluesky-social/indigo/atproto/syntax" ··· 17 18 xrpcc := xrpc.Client{ 18 19 Host: ident.PDSEndpoint(), 19 20 } 20 - resp, err := RepoGetRecord(ctx, &xrpcc, "", aturi.Collection().String(), ident.DID.String(), aturi.RecordKey().String()) 21 + resp, err := agnostic.RepoGetRecord(ctx, &xrpcc, "", aturi.Collection().String(), ident.DID.String(), aturi.RecordKey().String()) 21 22 if err != nil { 22 23 return nil, err 23 24 }
+5 -4
cmd/goat/record.go
··· 6 6 "fmt" 7 7 "os" 8 8 9 + "github.com/bluesky-social/indigo/api/agnostic" 9 10 comatproto "github.com/bluesky-social/indigo/api/atproto" 10 11 "github.com/bluesky-social/indigo/atproto/data" 11 12 "github.com/bluesky-social/indigo/atproto/identity" ··· 179 180 cursor := "" 180 181 for { 181 182 // collection string, cursor string, limit int64, repo string, reverse bool, rkeyEnd string, rkeyStart string 182 - resp, err := RepoListRecords(ctx, &xrpcc, nsid, cursor, 100, ident.DID.String(), false, "", "") 183 + resp, err := agnostic.RepoListRecords(ctx, &xrpcc, nsid, cursor, 100, ident.DID.String(), false, "", "") 183 184 if err != nil { 184 185 return err 185 186 } ··· 246 247 } 247 248 validate := !cctx.Bool("no-validate") 248 249 249 - resp, err := RepoCreateRecord(ctx, xrpcc, &RepoCreateRecord_Input{ 250 + resp, err := agnostic.RepoCreateRecord(ctx, xrpcc, &agnostic.RepoCreateRecord_Input{ 250 251 Collection: nsid, 251 252 Repo: xrpcc.Auth.Did, 252 253 Record: recordVal, ··· 293 294 rkey := cctx.String("rkey") 294 295 295 296 // NOTE: need to fetch existing record CID to perform swap. this is optional in theory, but golang can't deal with "optional" and "nullable", so we always need to set this (?) 296 - existing, err := RepoGetRecord(ctx, xrpcc, "", nsid, xrpcc.Auth.Did, rkey) 297 + existing, err := agnostic.RepoGetRecord(ctx, xrpcc, "", nsid, xrpcc.Auth.Did, rkey) 297 298 if err != nil { 298 299 return err 299 300 } ··· 305 306 306 307 validate := !cctx.Bool("no-validate") 307 308 308 - resp, err := RepoPutRecord(ctx, xrpcc, &RepoPutRecord_Input{ 309 + resp, err := agnostic.RepoPutRecord(ctx, xrpcc, &agnostic.RepoPutRecord_Input{ 309 310 Collection: nsid, 310 311 Repo: xrpcc.Auth.Did, 311 312 Record: recordVal,
+165 -5
cmd/goat/repo.go
··· 4 4 "bytes" 5 5 "context" 6 6 "encoding/json" 7 + "errors" 7 8 "fmt" 8 9 "os" 9 10 "path/filepath" 11 + "strings" 10 12 "time" 11 13 12 14 comatproto "github.com/bluesky-social/indigo/api/atproto" 13 15 "github.com/bluesky-social/indigo/atproto/data" 14 16 "github.com/bluesky-social/indigo/atproto/syntax" 17 + "github.com/bluesky-social/indigo/mst" 15 18 "github.com/bluesky-social/indigo/repo" 19 + "github.com/bluesky-social/indigo/util" 16 20 "github.com/bluesky-social/indigo/xrpc" 17 21 18 22 "github.com/ipfs/go-cid" 23 + cbor "github.com/ipfs/go-ipld-cbor" 24 + ipld "github.com/ipfs/go-ipld-format" 19 25 "github.com/urfave/cli/v2" 26 + "github.com/xlab/treeprint" 20 27 ) 21 28 22 29 var cmdRepo = &cli.Command{ ··· 59 66 Action: runRepoInspect, 60 67 }, 61 68 &cli.Command{ 69 + Name: "mst", 70 + Usage: "show repo MST structure", 71 + ArgsUsage: `<car-file>`, 72 + Flags: []cli.Flag{ 73 + &cli.BoolFlag{ 74 + Name: "full-cid", 75 + Aliases: []string{"f"}, 76 + Usage: "display full CIDs", 77 + }, 78 + &cli.StringFlag{ 79 + Name: "root", 80 + Aliases: []string{"r"}, 81 + Usage: "CID of root block", 82 + }, 83 + }, 84 + Action: runRepoMST, 85 + }, 86 + &cli.Command{ 62 87 Name: "unpack", 63 88 Usage: "extract records from CAR file as directory of JSON files", 64 89 ArgsUsage: `<car-file>`, ··· 93 118 return fmt.Errorf("no PDS endpoint for identity") 94 119 } 95 120 121 + // set longer timeout, for large CAR files 122 + xrpcc.Client = util.RobustHTTPClient() 123 + xrpcc.Client.Timeout = 600 * time.Second 124 + 96 125 carPath := cctx.String("output") 97 126 if carPath == "" { 98 127 // NOTE: having the rev in the the path might be nice 99 128 now := time.Now().Format("20060102150405") 100 129 carPath = fmt.Sprintf("%s.%s.car", username, now) 101 130 } 102 - // NOTE: there is a race condition, but nice to give a friendly error earlier before downloading 103 - if _, err := os.Stat(carPath); err == nil { 104 - return fmt.Errorf("file already exists: %s", carPath) 131 + output, err := getFileOrStdout(carPath) 132 + if err != nil { 133 + if errors.Is(err, os.ErrExist) { 134 + return fmt.Errorf("file already exists: %s", carPath) 135 + } 136 + return err 105 137 } 106 - fmt.Printf("downloading from %s to: %s\n", xrpcc.Host, carPath) 138 + defer output.Close() 139 + if carPath != stdIOPath { 140 + fmt.Printf("downloading from %s to: %s\n", xrpcc.Host, carPath) 141 + } 107 142 repoBytes, err := comatproto.SyncGetRepo(ctx, &xrpcc, ident.DID.String(), "") 108 143 if err != nil { 109 144 return err 110 145 } 111 - return os.WriteFile(carPath, repoBytes, 0666) 146 + if _, err := output.Write(repoBytes); err != nil { 147 + return err 148 + } 149 + return nil 112 150 } 113 151 114 152 func runRepoImport(cctx *cli.Context) error { ··· 192 230 // TODO: Signature? 193 231 194 232 return nil 233 + } 234 + 235 + func runRepoMST(cctx *cli.Context) error { 236 + ctx := context.Background() 237 + opts := repoMSTOptions{ 238 + carPath: cctx.Args().First(), 239 + fullCID: cctx.Bool("full-cid"), 240 + root: cctx.String("root"), 241 + } 242 + // read from file or stdin 243 + if opts.carPath == "" { 244 + return fmt.Errorf("need to provide path to CAR file as argument") 245 + } 246 + inputCAR, err := getFileOrStdin(opts.carPath) 247 + if err != nil { 248 + return err 249 + } 250 + // read repository tree in to memory 251 + r, err := repo.ReadRepoFromCar(ctx, inputCAR) 252 + if err != nil { 253 + return err 254 + } 255 + cst := util.CborStore(r.Blockstore()) 256 + // determine which root cid to use, defaulting to repo data root 257 + rootCID := r.DataCid() 258 + if opts.root != "" { 259 + optsRootCID, err := cid.Decode(opts.root) 260 + if err != nil { 261 + return err 262 + } 263 + rootCID = optsRootCID 264 + } 265 + // start walking mst 266 + exists, err := nodeExists(ctx, cst, rootCID) 267 + if err != nil { 268 + return err 269 + } 270 + tree := treeprint.NewWithRoot(displayCID(&rootCID, exists, opts)) 271 + if exists { 272 + if err := walkMST(ctx, cst, rootCID, tree, opts); err != nil { 273 + return err 274 + } 275 + } 276 + // print tree 277 + fmt.Println(tree.String()) 278 + return nil 279 + } 280 + 281 + func walkMST(ctx context.Context, cst *cbor.BasicIpldStore, cid cid.Cid, tree treeprint.Tree, opts repoMSTOptions) error { 282 + var node mst.NodeData 283 + if err := cst.Get(ctx, cid, &node); err != nil { 284 + return err 285 + } 286 + if node.Left != nil { 287 + exists, err := nodeExists(ctx, cst, *node.Left) 288 + if err != nil { 289 + return err 290 + } 291 + subtree := tree.AddBranch(displayCID(node.Left, exists, opts)) 292 + if exists { 293 + if err := walkMST(ctx, cst, *node.Left, subtree, opts); err != nil { 294 + return err 295 + } 296 + } 297 + } 298 + for _, entry := range node.Entries { 299 + exists, err := nodeExists(ctx, cst, entry.Val) 300 + if err != nil { 301 + return err 302 + } 303 + tree.AddNode(displayEntryVal(&entry, exists, opts)) 304 + if entry.Tree != nil { 305 + exists, err := nodeExists(ctx, cst, *entry.Tree) 306 + if err != nil { 307 + return err 308 + } 309 + subtree := tree.AddBranch(displayCID(entry.Tree, exists, opts)) 310 + if exists { 311 + if err := walkMST(ctx, cst, *entry.Tree, subtree, opts); err != nil { 312 + return err 313 + } 314 + } 315 + } 316 + } 317 + return nil 318 + } 319 + 320 + func displayEntryVal(entry *mst.TreeEntry, exists bool, opts repoMSTOptions) string { 321 + key := string(entry.KeySuffix) 322 + divider := " " 323 + if opts.fullCID { 324 + divider = "\n" 325 + } 326 + return strings.Repeat("∙", int(entry.PrefixLen)) + key + divider + displayCID(&entry.Val, exists, opts) 327 + } 328 + 329 + func displayCID(cid *cid.Cid, exists bool, opts repoMSTOptions) string { 330 + cidDisplay := cid.String() 331 + if !opts.fullCID { 332 + cidDisplay = "…" + string(cidDisplay[len(cidDisplay)-7:]) 333 + } 334 + connector := "─◉" 335 + if !exists { 336 + connector = "─◌" 337 + } 338 + return "[" + cidDisplay + "]" + connector 339 + } 340 + 341 + type repoMSTOptions struct { 342 + carPath string 343 + fullCID bool 344 + root string 345 + } 346 + 347 + func nodeExists(ctx context.Context, cst *cbor.BasicIpldStore, cid cid.Cid) (bool, error) { 348 + if _, err := cst.Blocks.Get(ctx, cid); err != nil { 349 + if errors.Is(err, ipld.ErrNotFound{}) { 350 + return false, nil 351 + } 352 + return false, err 353 + } 354 + return true, nil 195 355 } 196 356 197 357 func runRepoUnpack(cctx *cli.Context) error {
+1 -1
cmd/goat/repocreateRecord.go api/agnostic/repocreateRecord.go
··· 1 1 // Copied from indigo:api/atproto/repocreateRecords.go 2 2 3 - package main 3 + package agnostic 4 4 5 5 // schema: com.atproto.repo.createRecord 6 6
+1 -1
cmd/goat/repogetRecord.go api/agnostic/repogetRecord.go
··· 1 1 // Copied from indigo:api/atproto/repolistRecords.go 2 2 3 - package main 3 + package agnostic 4 4 5 5 // schema: com.atproto.repo.getRecord 6 6
+1 -1
cmd/goat/repolistRecords.go api/agnostic/repolistRecords.go
··· 1 1 // Copied from indigo:api/atproto/repolistRecords.go 2 2 3 - package main 3 + package agnostic 4 4 5 5 // schema: com.atproto.repo.listRecords 6 6
+1 -1
cmd/goat/repoputRecord.go api/agnostic/repoputRecord.go
··· 1 1 // Copied from indigo:api/atproto/repoputRecords.go 2 2 3 - package main 3 + package agnostic 4 4 5 5 // schema: com.atproto.repo.putRecord 6 6
+26
cmd/goat/util.go
··· 2 2 3 3 import ( 4 4 "context" 5 + "io" 6 + "os" 5 7 6 8 "github.com/bluesky-social/indigo/atproto/identity" 7 9 "github.com/bluesky-social/indigo/atproto/syntax" ··· 16 18 dir := identity.DefaultDirectory() 17 19 return dir.Lookup(ctx, *id) 18 20 } 21 + 22 + const stdIOPath = "-" 23 + 24 + func getFileOrStdin(path string) (io.Reader, error) { 25 + if path == stdIOPath { 26 + return os.Stdin, nil 27 + } 28 + file, err := os.Open(path) 29 + if err != nil { 30 + return nil, err 31 + } 32 + return file, nil 33 + } 34 + 35 + func getFileOrStdout(path string) (io.WriteCloser, error) { 36 + if path == stdIOPath { 37 + return os.Stdout, nil 38 + } 39 + file, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0666) 40 + if err != nil { 41 + return nil, err 42 + } 43 + return file, nil 44 + }
+2
cmd/gosky/admin.go
··· 403 403 false, // hasComment bool 404 404 false, // includeAllUserRecords bool 405 405 100, // limit int64 406 + nil, // policies []string 406 407 nil, // removedLabels []string 407 408 nil, // removedTags []string 408 409 nil, // reportTypes []string ··· 717 718 false, // hasComment bool 718 719 false, // includeAllUserRecords bool 719 720 100, // limit int64 721 + nil, // policies []string 720 722 nil, // removedLabels []string 721 723 nil, // removedTags []string 722 724 nil, // reportTypes []string
+1 -1
cmd/gosky/bsky.go
··· 335 335 return err 336 336 } 337 337 338 - notifs, err := appbsky.NotificationListNotifications(ctx, xrpcc, "", 50, false, "") 338 + notifs, err := appbsky.NotificationListNotifications(ctx, xrpcc, "", 50, false, nil, "") 339 339 if err != nil { 340 340 return err 341 341 }
+2 -2
cmd/gosky/car.go
··· 64 64 if topDir == "" { 65 65 topDir = did.String() 66 66 } 67 - log.Infof("writing output to: %s", topDir) 67 + log.Info("writing output", "topDir", topDir) 68 68 69 69 commitPath := topDir + "/_commit" 70 70 os.MkdirAll(filepath.Dir(commitPath), os.ModePerm) ··· 90 90 if err != nil { 91 91 return err 92 92 } 93 - log.Debugf("processing record: %s", k) 93 + log.Debug("processing record", "rec", k) 94 94 95 95 // TODO: check if path is safe more carefully 96 96 recPath := topDir + "/" + k
+15 -9
cmd/gosky/debug.go
··· 106 106 } 107 107 108 108 seqScheduler := sequential.NewScheduler("debug-inspect-event", rsc.EventHandler) 109 - err = events.HandleRepoStream(ctx, con, seqScheduler) 109 + err = events.HandleRepoStream(ctx, con, seqScheduler, nil) 110 110 if err != errFoundIt { 111 111 return err 112 112 } ··· 284 284 }, 285 285 } 286 286 seqScheduler := sequential.NewScheduler("debug-stream", rsc.EventHandler) 287 - err = events.HandleRepoStream(ctx, con, seqScheduler) 287 + err = events.HandleRepoStream(ctx, con, seqScheduler, nil) 288 288 if err != nil { 289 289 return err 290 290 } ··· 390 390 go func(i int, url string) { 391 391 con, _, err := d.Dial(url, http.Header{}) 392 392 if err != nil { 393 - log.Fatalf("Dial failure on url%d: %s", i+1, err) 393 + log.Error("Dial failure", "i", i, "url", url, "err", err) 394 + os.Exit(1) 394 395 } 395 396 396 397 ctx := context.TODO() ··· 405 406 }, 406 407 } 407 408 seqScheduler := sequential.NewScheduler(fmt.Sprintf("debug-stream-%d", i+1), rsc.EventHandler) 408 - if err := events.HandleRepoStream(ctx, con, seqScheduler); err != nil { 409 - log.Fatalf("HandleRepoStream failure on url%d: %s", i+1, err) 409 + if err := events.HandleRepoStream(ctx, con, seqScheduler, nil); err != nil { 410 + log.Error("HandleRepoStream failure", "i", i, "url", url, "err", err) 411 + os.Exit(1) 410 412 } 411 413 }(i, url) 412 414 } ··· 876 878 logger := log.With("host", cctx.String("host-1")) 877 879 repo1bytes, err := comatproto.SyncGetRepo(ctx, &xrpc1, did.String(), "") 878 880 if err != nil { 879 - logger.Fatalf("getting repo: %s", err) 881 + logger.Error("getting repo", "err", err) 882 + os.Exit(1) 880 883 return 881 884 } 882 885 883 886 rep1, err = repo.ReadRepoFromCar(ctx, bytes.NewReader(repo1bytes)) 884 887 if err != nil { 885 - logger.Fatalf("reading repo: %s", err) 888 + logger.Error("reading repo", "err", err) 889 + os.Exit(1) 886 890 return 887 891 } 888 892 }() ··· 893 897 logger := log.With("host", cctx.String("host-2")) 894 898 repo2bytes, err := comatproto.SyncGetRepo(ctx, &xrpc2, did.String(), "") 895 899 if err != nil { 896 - logger.Fatalf("getting repo: %s", err) 900 + logger.Error("getting repo", "err", err) 901 + os.Exit(1) 897 902 return 898 903 } 899 904 900 905 rep2, err = repo.ReadRepoFromCar(ctx, bytes.NewReader(repo2bytes)) 901 906 if err != nil { 902 - logger.Fatalf("reading repo: %s", err) 907 + logger.Error("reading repo", "err", err) 908 + os.Exit(1) 903 909 return 904 910 } 905 911 }()
+25 -4
cmd/gosky/main.go
··· 7 7 "encoding/json" 8 8 "fmt" 9 9 "io" 10 + "log/slog" 10 11 "net/http" 11 12 "os" 12 13 "os/signal" ··· 18 19 "github.com/bluesky-social/indigo/api/atproto" 19 20 comatproto "github.com/bluesky-social/indigo/api/atproto" 20 21 "github.com/bluesky-social/indigo/api/bsky" 22 + "github.com/bluesky-social/indigo/atproto/identity" 21 23 "github.com/bluesky-social/indigo/atproto/syntax" 22 24 "github.com/bluesky-social/indigo/events" 23 25 "github.com/bluesky-social/indigo/events/schedulers/sequential" ··· 38 40 _ "github.com/joho/godotenv/autoload" 39 41 40 42 "github.com/carlmjohnson/versioninfo" 41 - logging "github.com/ipfs/go-log" 42 43 "github.com/polydawn/refmt/cbor" 43 44 rejson "github.com/polydawn/refmt/json" 44 45 "github.com/polydawn/refmt/shared" 45 46 cli "github.com/urfave/cli/v2" 46 47 ) 47 48 48 - var log = logging.Logger("gosky") 49 + var log = slog.Default().With("system", "gosky") 49 50 50 51 func main() { 51 52 run(os.Args) ··· 79 80 EnvVars: []string{"ATP_PLC_HOST"}, 80 81 }, 81 82 } 83 + 84 + _, err := cliutil.SetupSlog(cliutil.LogOptions{}) 85 + if err != nil { 86 + fmt.Fprintf(os.Stderr, "logging setup error: %s\n", err.Error()) 87 + os.Exit(1) 88 + return 89 + } 90 + 82 91 app.Commands = []*cli.Command{ 83 92 accountCmd, 84 93 adminCmd, ··· 338 347 }, 339 348 } 340 349 seqScheduler := sequential.NewScheduler(con.RemoteAddr().String(), rsc.EventHandler) 341 - return events.HandleRepoStream(ctx, con, seqScheduler) 350 + return events.HandleRepoStream(ctx, con, seqScheduler, log) 342 351 }, 343 352 } 344 353 ··· 462 471 return fmt.Errorf("unrecognized link") 463 472 } 464 473 474 + atid, err := syntax.ParseAtIdentifier(did) 475 + if err != nil { 476 + return err 477 + } 478 + 479 + resp, err := identity.DefaultDirectory().Lookup(ctx, *atid) 480 + if err != nil { 481 + return err 482 + } 483 + 484 + xrpcc.Host = resp.PDSEndpoint() 485 + 465 486 out, err := comatproto.RepoGetRecord(ctx, xrpcc, "", collection, did, rkey) 466 487 if err != nil { 467 488 return err ··· 490 511 491 512 rc, rec, err := rr.GetRecord(ctx, cctx.Args().First()) 492 513 if err != nil { 493 - return err 514 + return fmt.Errorf("get record failed: %w", err) 494 515 } 495 516 496 517 if cctx.Bool("raw") {
+4 -4
cmd/gosky/streamdiff.go
··· 58 58 }, 59 59 } 60 60 seqScheduler := sequential.NewScheduler("streamA", rsc.EventHandler) 61 - err = events.HandleRepoStream(ctx, cona, seqScheduler) 61 + err = events.HandleRepoStream(ctx, cona, seqScheduler, log) 62 62 if err != nil { 63 - log.Errorf("stream A failed: %s", err) 63 + log.Error("stream A failed", "err", err) 64 64 } 65 65 }() 66 66 ··· 82 82 } 83 83 84 84 seqScheduler := sequential.NewScheduler("streamB", rsc.EventHandler) 85 - err = events.HandleRepoStream(ctx, conb, seqScheduler) 85 + err = events.HandleRepoStream(ctx, conb, seqScheduler, log) 86 86 if err != nil { 87 - log.Errorf("stream B failed: %s", err) 87 + log.Error("stream B failed", "err", err) 88 88 } 89 89 }() 90 90
+1 -1
cmd/gosky/sync.go
··· 66 66 xrpcc.Host = h 67 67 } 68 68 69 - log.Infof("downloading from %s to: %s", xrpcc.Host, carPath) 69 + log.Info("downloading", "from", xrpcc.Host, "to", carPath) 70 70 repoBytes, err := comatproto.SyncGetRepo(ctx, xrpcc, ident.DID.String(), "") 71 71 if err != nil { 72 72 return err
+2 -2
cmd/hepa/Dockerfile
··· 3 3 # podman build -f ./cmd/hepa/Dockerfile -t hepa . 4 4 5 5 ### Compile stage 6 - FROM golang:1.22-alpine3.19 AS build-env 6 + FROM golang:1.23-alpine3.20 AS build-env 7 7 RUN apk add --no-cache build-base make git 8 8 9 9 ADD . /dockerbuild ··· 15 15 go build -tags timetzdata -o /hepa ./cmd/hepa 16 16 17 17 ### Run stage 18 - FROM alpine:3.19 18 + FROM alpine:3.20 19 19 20 20 RUN apk add --no-cache --update dumb-init ca-certificates 21 21 ENTRYPOINT ["dumb-init", "--"]
+44 -20
cmd/hepa/main.go
··· 149 149 Usage: "secret token for prescreen server", 150 150 EnvVars: []string{"HEPA_PRESCREEN_TOKEN"}, 151 151 }, 152 + &cli.DurationFlag{ 153 + Name: "report-dupe-period", 154 + Usage: "time period within which automod will not re-report an account for the same reasonType", 155 + EnvVars: []string{"HEPA_REPORT_DUPE_PERIOD"}, 156 + Value: 1 * 24 * time.Hour, 157 + }, 158 + &cli.IntFlag{ 159 + Name: "quota-mod-report-day", 160 + Usage: "number of reports automod can file per day, for all subjects and types combined (circuit breaker)", 161 + EnvVars: []string{"HEPA_QUOTA_MOD_REPORT_DAY"}, 162 + Value: 10000, 163 + }, 164 + &cli.IntFlag{ 165 + Name: "quota-mod-takedown-day", 166 + Usage: "number of takedowns automod can action per day, for all subjects combined (circuit breaker)", 167 + EnvVars: []string{"HEPA_QUOTA_MOD_TAKEDOWN_DAY"}, 168 + Value: 200, 169 + }, 170 + &cli.IntFlag{ 171 + Name: "quota-mod-action-day", 172 + Usage: "number of misc actions automod can do per day, for all subjects combined (circuit breaker)", 173 + EnvVars: []string{"HEPA_QUOTA_MOD_ACTION_DAY"}, 174 + Value: 2000, 175 + }, 152 176 } 153 177 154 178 app.Commands = []*cli.Command{ ··· 237 261 dir, 238 262 Config{ 239 263 Logger: logger, 240 - RelayHost: cctx.String("atp-relay-host"), // DEPRECATED 241 264 BskyHost: cctx.String("atp-bsky-host"), 242 265 OzoneHost: cctx.String("atp-ozone-host"), 243 266 OzoneDID: cctx.String("ozone-did"), ··· 252 275 AbyssPassword: cctx.String("abyss-password"), 253 276 RatelimitBypass: cctx.String("ratelimit-bypass"), 254 277 RulesetName: cctx.String("ruleset"), 255 - FirehoseParallelism: cctx.Int("firehose-parallelism"), // DEPRECATED 256 278 PreScreenHost: cctx.String("prescreen-host"), 257 279 PreScreenToken: cctx.String("prescreen-token"), 280 + ReportDupePeriod: cctx.Duration("report-dupe-period"), 281 + QuotaModReportDay: cctx.Int("quota-mod-report-day"), 282 + QuotaModTakedownDay: cctx.Int("quota-mod-takedown-day"), 283 + QuotaModActionDay: cctx.Int("quota-mod-action-day"), 258 284 }, 259 285 ) 260 286 if err != nil { ··· 332 358 return NewServer( 333 359 dir, 334 360 Config{ 335 - Logger: logger, 336 - RelayHost: cctx.String("atp-relay-host"), 337 - BskyHost: cctx.String("atp-bsky-host"), 338 - OzoneHost: cctx.String("atp-ozone-host"), 339 - OzoneDID: cctx.String("ozone-did"), 340 - OzoneAdminToken: cctx.String("ozone-admin-token"), 341 - PDSHost: cctx.String("atp-pds-host"), 342 - PDSAdminToken: cctx.String("pds-admin-token"), 343 - SetsFileJSON: cctx.String("sets-json-path"), 344 - RedisURL: cctx.String("redis-url"), 345 - HiveAPIToken: cctx.String("hiveai-api-token"), 346 - AbyssHost: cctx.String("abyss-host"), 347 - AbyssPassword: cctx.String("abyss-password"), 348 - RatelimitBypass: cctx.String("ratelimit-bypass"), 349 - RulesetName: cctx.String("ruleset"), 350 - FirehoseParallelism: cctx.Int("firehose-parallelism"), 351 - PreScreenHost: cctx.String("prescreen-host"), 352 - PreScreenToken: cctx.String("prescreen-token"), 361 + Logger: logger, 362 + BskyHost: cctx.String("atp-bsky-host"), 363 + OzoneHost: cctx.String("atp-ozone-host"), 364 + OzoneDID: cctx.String("ozone-did"), 365 + OzoneAdminToken: cctx.String("ozone-admin-token"), 366 + PDSHost: cctx.String("atp-pds-host"), 367 + PDSAdminToken: cctx.String("pds-admin-token"), 368 + SetsFileJSON: cctx.String("sets-json-path"), 369 + RedisURL: cctx.String("redis-url"), 370 + HiveAPIToken: cctx.String("hiveai-api-token"), 371 + AbyssHost: cctx.String("abyss-host"), 372 + AbyssPassword: cctx.String("abyss-password"), 373 + RatelimitBypass: cctx.String("ratelimit-bypass"), 374 + RulesetName: cctx.String("ruleset"), 375 + PreScreenHost: cctx.String("prescreen-host"), 376 + PreScreenToken: cctx.String("prescreen-token"), 353 377 }, 354 378 ) 355 379 }
+16 -17
cmd/hepa/server.go
··· 6 6 "log/slog" 7 7 "net/http" 8 8 "os" 9 - "strings" 10 9 "time" 11 10 12 11 "github.com/bluesky-social/indigo/atproto/identity" ··· 14 13 "github.com/bluesky-social/indigo/automod" 15 14 "github.com/bluesky-social/indigo/automod/cachestore" 16 15 "github.com/bluesky-social/indigo/automod/countstore" 16 + "github.com/bluesky-social/indigo/automod/engine" 17 17 "github.com/bluesky-social/indigo/automod/flagstore" 18 18 "github.com/bluesky-social/indigo/automod/rules" 19 19 "github.com/bluesky-social/indigo/automod/setstore" ··· 29 29 Engine *automod.Engine 30 30 RedisClient *redis.Client 31 31 32 - relayHost string // DEPRECATED 33 - firehoseParallelism int // DEPRECATED 34 - logger *slog.Logger 32 + logger *slog.Logger 35 33 } 36 34 37 35 type Config struct { 38 36 Logger *slog.Logger 39 - RelayHost string // DEPRECATED 40 37 BskyHost string 41 38 OzoneHost string 42 39 OzoneDID string ··· 51 48 AbyssPassword string 52 49 RulesetName string 53 50 RatelimitBypass string 54 - FirehoseParallelism int // DEPRECATED 55 51 PreScreenHost string 56 52 PreScreenToken string 53 + ReportDupePeriod time.Duration 54 + QuotaModReportDay int 55 + QuotaModTakedownDay int 56 + QuotaModActionDay int 57 57 } 58 58 59 59 func NewServer(dir identity.Directory, config Config) (*Server, error) { ··· 62 62 logger = slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ 63 63 Level: slog.LevelInfo, 64 64 })) 65 - } 66 - 67 - relayws := config.RelayHost 68 - if !strings.HasPrefix(relayws, "ws") { 69 - return nil, fmt.Errorf("specified relay host must include 'ws://' or 'wss://'") 70 65 } 71 66 72 67 var ozoneClient *xrpc.Client ··· 206 201 bskyClient.Headers["x-ratelimit-bypass"] = config.RatelimitBypass 207 202 } 208 203 blobClient := util.RobustHTTPClient() 209 - engine := automod.Engine{ 204 + eng := automod.Engine{ 210 205 Logger: logger, 211 206 Directory: dir, 212 207 Counters: counters, ··· 219 214 OzoneClient: ozoneClient, 220 215 AdminClient: adminClient, 221 216 BlobClient: blobClient, 217 + Config: engine.EngineConfig{ 218 + ReportDupePeriod: config.ReportDupePeriod, 219 + QuotaModReportDay: config.QuotaModReportDay, 220 + QuotaModTakedownDay: config.QuotaModTakedownDay, 221 + QuotaModActionDay: config.QuotaModActionDay, 222 + }, 222 223 } 223 224 224 225 s := &Server{ 225 - relayHost: config.RelayHost, 226 - firehoseParallelism: config.FirehoseParallelism, 227 - logger: logger, 228 - Engine: &engine, 229 - RedisClient: rdb, 226 + logger: logger, 227 + Engine: &eng, 228 + RedisClient: rdb, 230 229 } 231 230 232 231 return s, nil
+1 -4
cmd/laputa/main.go
··· 14 14 _ "go.uber.org/automaxprocs" 15 15 16 16 "github.com/carlmjohnson/versioninfo" 17 - logging "github.com/ipfs/go-log" 18 17 "github.com/urfave/cli/v2" 19 18 "go.opentelemetry.io/otel" 20 19 "go.opentelemetry.io/otel/attribute" ··· 24 23 semconv "go.opentelemetry.io/otel/semconv/v1.4.0" 25 24 "gorm.io/plugin/opentelemetry/tracing" 26 25 ) 27 - 28 - var log = logging.Logger("laputa") 29 26 30 27 func main() { 31 28 run(os.Args) ··· 158 155 } 159 156 } 160 157 161 - cstore, err := carstore.NewCarStore(csdb, csdir) 158 + cstore, err := carstore.NewCarStore(csdb, []string{csdir}) 162 159 if err != nil { 163 160 return err 164 161 }
+2 -2
cmd/netsync/main.go
··· 345 345 Handler: mux, 346 346 } 347 347 348 + state.wg.Add(1) 348 349 go func() { 349 - state.wg.Add(1) 350 350 defer state.wg.Done() 351 351 if err := metricsServer.ListenAndServe(); err != http.ErrServerClosed { 352 352 logger.Error("failed to start metrics server", "err", err) ··· 368 368 } 369 369 370 370 // Check for empty queue 371 + state.wg.Add(1) 371 372 go func() { 372 - state.wg.Add(1) 373 373 defer state.wg.Done() 374 374 t := time.NewTicker(30 * time.Second) 375 375 for {
+2 -2
cmd/palomar/Dockerfile
··· 3 3 # podman build -f ./cmd/palomar/Dockerfile -t palomar . 4 4 5 5 ### Compile stage 6 - FROM golang:1.22-alpine3.19 AS build-env 6 + FROM golang:1.23-alpine3.20 AS build-env 7 7 RUN apk add --no-cache build-base make git 8 8 9 9 ADD . /dockerbuild ··· 15 15 go build -tags timetzdata -o /palomar ./cmd/palomar 16 16 17 17 ### Run stage 18 - FROM alpine:3.19 18 + FROM alpine:3.20 19 19 20 20 RUN apk add --no-cache --update dumb-init ca-certificates 21 21 ENTRYPOINT ["dumb-init", "--"]
+43
cmd/rainbow/Dockerfile
··· 1 + FROM golang:1.23-bullseye AS build-env 2 + 3 + ENV DEBIAN_FRONTEND=noninteractive 4 + ENV TZ=Etc/UTC 5 + ENV GODEBUG="netdns=go" 6 + ENV GOOS="linux" 7 + ENV GOARCH="amd64" 8 + ENV CGO_ENABLED="1" 9 + 10 + WORKDIR /usr/src/rainbow 11 + 12 + COPY . . 13 + 14 + RUN go mod download && \ 15 + go mod verify 16 + 17 + RUN go build \ 18 + -v \ 19 + -trimpath \ 20 + -tags timetzdata \ 21 + -o /rainbow-bin \ 22 + ./cmd/rainbow 23 + 24 + FROM debian:bullseye-slim 25 + 26 + ENV DEBIAN_FRONTEND="noninteractive" 27 + ENV TZ=Etc/UTC 28 + ENV GODEBUG="netdns=go" 29 + 30 + RUN apt-get update && apt-get install --yes \ 31 + dumb-init \ 32 + ca-certificates \ 33 + runit 34 + 35 + WORKDIR /rainbow 36 + COPY --from=build-env /rainbow-bin /usr/bin/rainbow 37 + 38 + ENTRYPOINT ["/usr/bin/dumb-init", "--"] 39 + CMD ["/usr/bin/rainbow"] 40 + 41 + LABEL org.opencontainers.image.source=https://github.com/bluesky-social/indigo 42 + LABEL org.opencontainers.image.description="rainbow atproto firehose fanout service" 43 + LABEL org.opencontainers.image.licenses=MIT
+32
cmd/rainbow/README.md
··· 1 + 2 + `rainbow`: atproto Firehose Fanout Service 3 + ========================================== 4 + 5 + This is an atproto service which consumes from a firehose (eg, from a relay or PDS) and fans out events to many subscribers. 6 + 7 + Features and design points: 8 + 9 + - retains "backfill window" on local disk (using [pebble](https://github.com/cockroachdb/pebble)) 10 + - serves the `com.atproto.sync.subscribeRepos` endpoint (WebSocket) 11 + - retains upstream firehose "sequence numbers" 12 + - does not validate events (signatures, repo tree, hashes, etc), just passes through 13 + - does not archive or mirror individual records or entire repositories (or implement related API endpoints) 14 + - disk I/O intensive: fast NVMe disks are recommended, and RAM is helpful for caching 15 + - single golang binary for easy deployment 16 + - observability: logging, prometheus metrics, OTEL traces 17 + 18 + ## Running 19 + 20 + This is a simple, single-binary Go program. You can also build and run it as a docker container (see `./Dockerfile`). 21 + 22 + From the top level of this repo, you can build: 23 + 24 + ```shell 25 + go build ./cmd/rainbow -o rainbow-bin 26 + ``` 27 + 28 + or just run it, and see configuration options: 29 + 30 + ```shell 31 + go run ./cmd/rainbow --help 32 + ```
+218
cmd/rainbow/main.go
··· 1 + package main 2 + 3 + import ( 4 + "context" 5 + "log/slog" 6 + _ "net/http/pprof" 7 + "os" 8 + "os/signal" 9 + "syscall" 10 + "time" 11 + 12 + "github.com/bluesky-social/indigo/events" 13 + "github.com/bluesky-social/indigo/splitter" 14 + 15 + "github.com/carlmjohnson/versioninfo" 16 + _ "github.com/joho/godotenv/autoload" 17 + "github.com/urfave/cli/v2" 18 + "go.opentelemetry.io/otel" 19 + "go.opentelemetry.io/otel/attribute" 20 + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" 21 + "go.opentelemetry.io/otel/sdk/resource" 22 + tracesdk "go.opentelemetry.io/otel/sdk/trace" 23 + semconv "go.opentelemetry.io/otel/semconv/v1.4.0" 24 + _ "go.uber.org/automaxprocs" 25 + ) 26 + 27 + var log = slog.Default().With("system", "rainbow") 28 + 29 + func init() { 30 + // control log level using, eg, GOLOG_LOG_LEVEL=debug 31 + //logging.SetAllLoggers(logging.LevelDebug) 32 + } 33 + 34 + func main() { 35 + run(os.Args) 36 + } 37 + 38 + func run(args []string) { 39 + app := cli.App{ 40 + Name: "rainbow", 41 + Usage: "atproto firehose fan-out daemon", 42 + Version: versioninfo.Short(), 43 + } 44 + 45 + app.Flags = []cli.Flag{ 46 + // TODO: unimplemented, always assumes https:// and wss:// 47 + //&cli.BoolFlag{ 48 + // Name: "crawl-insecure-ws", 49 + // Usage: "when connecting to PDS instances, use ws:// instead of wss://", 50 + // EnvVars: []string{"RAINBOW_INSECURE_CRAWL"}, 51 + //}, 52 + &cli.StringFlag{ 53 + Name: "splitter-host", 54 + Value: "bsky.network", 55 + EnvVars: []string{"ATP_RELAY_HOST", "RAINBOW_RELAY_HOST"}, 56 + }, 57 + &cli.StringFlag{ 58 + Name: "persist-db", 59 + Value: "./rainbow.db", 60 + Usage: "path to persistence db", 61 + EnvVars: []string{"RAINBOW_DB_PATH"}, 62 + }, 63 + &cli.StringFlag{ 64 + Name: "cursor-file", 65 + Value: "./rainbow-cursor", 66 + Usage: "write upstream cursor number to this file", 67 + EnvVars: []string{"RAINBOW_CURSOR_PATH"}, 68 + }, 69 + &cli.StringFlag{ 70 + Name: "api-listen", 71 + Value: ":2480", 72 + EnvVars: []string{"RAINBOW_API_LISTEN"}, 73 + }, 74 + &cli.StringFlag{ 75 + Name: "metrics-listen", 76 + Value: ":2481", 77 + EnvVars: []string{"RAINBOW_METRICS_LISTEN", "SPLITTER_METRICS_LISTEN"}, 78 + }, 79 + &cli.Float64Flag{ 80 + Name: "persist-hours", 81 + Value: 24 * 3, 82 + EnvVars: []string{"RAINBOW_PERSIST_HOURS", "SPLITTER_PERSIST_HOURS"}, 83 + Usage: "hours to buffer (float, may be fractional)", 84 + }, 85 + &cli.Int64Flag{ 86 + Name: "persist-bytes", 87 + Value: 0, 88 + Usage: "max bytes target for event cache, 0 to disable size target trimming", 89 + EnvVars: []string{"RAINBOW_PERSIST_BYTES", "SPLITTER_PERSIST_BYTES"}, 90 + }, 91 + &cli.StringSliceFlag{ 92 + Name: "next-crawler", 93 + Usage: "forward POST requestCrawl to this url, should be machine root url and not xrpc/requestCrawl, comma separated list", 94 + EnvVars: []string{"RELAY_NEXT_CRAWLER"}, 95 + }, 96 + } 97 + 98 + // TODO: slog.SetDefault and set module `var log *slog.Logger` based on flags and env 99 + 100 + app.Action = Splitter 101 + err := app.Run(os.Args) 102 + if err != nil { 103 + log.Error(err.Error()) 104 + os.Exit(1) 105 + } 106 + } 107 + 108 + func Splitter(cctx *cli.Context) error { 109 + // Trap SIGINT to trigger a shutdown. 110 + signals := make(chan os.Signal, 1) 111 + signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM) 112 + 113 + // Enable OTLP HTTP exporter 114 + // For relevant environment variables: 115 + // https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlptrace#readme-environment-variables 116 + // At a minimum, you need to set 117 + // OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 118 + if ep := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"); ep != "" { 119 + log.Info("setting up trace exporter", "endpoint", ep) 120 + ctx, cancel := context.WithCancel(context.Background()) 121 + defer cancel() 122 + 123 + exp, err := otlptracehttp.New(ctx) 124 + if err != nil { 125 + log.Error("failed to create trace exporter", "error", err) 126 + os.Exit(1) 127 + } 128 + defer func() { 129 + ctx, cancel := context.WithTimeout(context.Background(), time.Second) 130 + defer cancel() 131 + if err := exp.Shutdown(ctx); err != nil { 132 + log.Error("failed to shutdown trace exporter", "error", err) 133 + } 134 + }() 135 + 136 + tp := tracesdk.NewTracerProvider( 137 + tracesdk.WithBatcher(exp), 138 + tracesdk.WithResource(resource.NewWithAttributes( 139 + semconv.SchemaURL, 140 + semconv.ServiceNameKey.String("splitter"), 141 + attribute.String("env", os.Getenv("ENVIRONMENT")), // DataDog 142 + attribute.String("environment", os.Getenv("ENVIRONMENT")), // Others 143 + attribute.Int64("ID", 1), 144 + )), 145 + ) 146 + otel.SetTracerProvider(tp) 147 + } 148 + 149 + persistPath := cctx.String("persist-db") 150 + upstreamHost := cctx.String("splitter-host") 151 + nextCrawlers := cctx.StringSlice("next-crawler") 152 + 153 + var spl *splitter.Splitter 154 + var err error 155 + if persistPath != "" { 156 + log.Info("building splitter with storage at", "path", persistPath) 157 + ppopts := events.PebblePersistOptions{ 158 + DbPath: persistPath, 159 + PersistDuration: time.Duration(float64(time.Hour) * cctx.Float64("persist-hours")), 160 + GCPeriod: 5 * time.Minute, 161 + MaxBytes: uint64(cctx.Int64("persist-bytes")), 162 + } 163 + conf := splitter.SplitterConfig{ 164 + UpstreamHost: upstreamHost, 165 + CursorFile: cctx.String("cursor-file"), 166 + PebbleOptions: &ppopts, 167 + } 168 + spl, err = splitter.NewSplitter(conf, nextCrawlers) 169 + } else { 170 + log.Info("building in-memory splitter") 171 + conf := splitter.SplitterConfig{ 172 + UpstreamHost: upstreamHost, 173 + CursorFile: cctx.String("cursor-file"), 174 + } 175 + spl, err = splitter.NewSplitter(conf, nextCrawlers) 176 + } 177 + if err != nil { 178 + log.Error("failed to create splitter", "path", persistPath, "error", err) 179 + os.Exit(1) 180 + return err 181 + } 182 + 183 + // set up metrics endpoint 184 + go func() { 185 + if err := spl.StartMetrics(cctx.String("metrics-listen")); err != nil { 186 + log.Error("failed to start metrics endpoint", "err", err) 187 + os.Exit(1) 188 + } 189 + }() 190 + 191 + runErr := make(chan error, 1) 192 + 193 + go func() { 194 + err := spl.Start(cctx.String("api-listen")) 195 + runErr <- err 196 + }() 197 + 198 + log.Info("startup complete") 199 + select { 200 + case <-signals: 201 + log.Info("received shutdown signal") 202 + if err := spl.Shutdown(); err != nil { 203 + log.Error("error during Splitter shutdown", "err", err) 204 + } 205 + case err := <-runErr: 206 + if err != nil { 207 + log.Error("error during Splitter startup", "err", err) 208 + } 209 + log.Info("shutting down") 210 + if err := spl.Shutdown(); err != nil { 211 + log.Error("error during Splitter shutdown", "err", err) 212 + } 213 + } 214 + 215 + log.Info("shutdown complete") 216 + 217 + return nil 218 + }
+1 -1
cmd/sonar/Dockerfile
··· 1 1 # Stage 1: Build the Go binary 2 - FROM golang:1.22-alpine3.19 AS builder 2 + FROM golang:1.23-alpine3.20 AS builder 3 3 4 4 # Install SSL ca certificates. 5 5 RUN apk update && apk add --no-cache ca-certificates && update-ca-certificates
+5 -5
cmd/sonar/main.go
··· 104 104 pool := sequential.NewScheduler(u.Host, s.HandleStreamEvent) 105 105 106 106 // Start a goroutine to manage the cursor file, saving the current cursor every 5 seconds. 107 + wg.Add(1) 107 108 go func() { 108 - wg.Add(1) 109 109 defer wg.Done() 110 110 ticker := time.NewTicker(5 * time.Second) 111 111 logger := logger.With("source", "cursor_file_manager") ··· 130 130 }() 131 131 132 132 // Start a goroutine to manage the liveness checker, shutting down if no events are received for 15 seconds 133 + wg.Add(1) 133 134 go func() { 134 - wg.Add(1) 135 135 defer wg.Done() 136 136 ticker := time.NewTicker(15 * time.Second) 137 137 lastSeq := int64(0) ··· 167 167 } 168 168 169 169 // Startup metrics server 170 + wg.Add(1) 170 171 go func() { 171 - wg.Add(1) 172 172 defer wg.Done() 173 173 logger = logger.With("source", "metrics_server") 174 174 ··· 194 194 } 195 195 defer c.Close() 196 196 197 + wg.Add(1) 197 198 go func() { 198 - wg.Add(1) 199 199 defer wg.Done() 200 - err = events.HandleRepoStream(ctx, c, pool) 200 + err = events.HandleRepoStream(ctx, c, pool, logger) 201 201 logger.Info("HandleRepoStream returned unexpectedly", "err", err) 202 202 cancel() 203 203 }()
-3
cmd/stress/main.go
··· 26 26 _ "github.com/joho/godotenv/autoload" 27 27 28 28 "github.com/carlmjohnson/versioninfo" 29 - logging "github.com/ipfs/go-log" 30 29 "github.com/ipld/go-car" 31 30 cli "github.com/urfave/cli/v2" 32 31 ) 33 - 34 - var log = logging.Logger("stress") 35 32 36 33 func main() { 37 34 run(os.Args)
+1 -1
cmd/supercollider/Dockerfile
··· 1 1 # Stage 1: Build the Go binary 2 - FROM golang:1.22-alpine3.18 AS builder 2 + FROM golang:1.23-alpine3.20 AS builder 3 3 4 4 # Create a directory for the application 5 5 WORKDIR /app
+1 -1
cmd/supercollider/main.go
··· 565 565 return nil, nil, err 566 566 } 567 567 568 - cs, err := carstore.NewCarStore(cardb, cspath) 568 + cs, err := carstore.NewCarStore(cardb, []string{cspath}) 569 569 if err != nil { 570 570 return nil, nil, err 571 571 }
+6
did/metrics.go
··· 9 9 Name: "multiresolver_resolved_dids_total", 10 10 Help: "Total number of DIDs resolved", 11 11 }, []string{"resolver"}) 12 + 13 + var mrResolveDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ 14 + Name: "indigo_multiresolver_resolve_duration_seconds", 15 + Help: "A histogram of resolve latencies", 16 + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), 17 + }, []string{"resolver"})
+6
did/multi.go
··· 3 3 import ( 4 4 "context" 5 5 "fmt" 6 + "time" 6 7 7 8 "github.com/whyrusleeping/go-did" 8 9 ) ··· 43 44 } 44 45 45 46 func (mr *MultiResolver) GetDocument(ctx context.Context, didstr string) (*did.Document, error) { 47 + s := time.Now() 48 + 46 49 pdid, err := did.ParseDID(didstr) 47 50 if err != nil { 48 51 return nil, err 49 52 } 50 53 51 54 method := pdid.Protocol() 55 + defer func() { 56 + mrResolveDuration.WithLabelValues(method).Observe(time.Since(s).Seconds()) 57 + }() 52 58 53 59 res, ok := mr.handlers[method] 54 60 if !ok {
+18 -10
events/consumer.go
··· 4 4 "context" 5 5 "fmt" 6 6 "io" 7 + "log/slog" 7 8 "net" 8 9 "time" 9 10 ··· 108 109 return n, err 109 110 } 110 111 111 - func HandleRepoStream(ctx context.Context, con *websocket.Conn, sched Scheduler) error { 112 + // HandleRepoStream 113 + // con is source of events 114 + // sched gets AddWork for each event 115 + // log may be nil for default logger 116 + func HandleRepoStream(ctx context.Context, con *websocket.Conn, sched Scheduler, log *slog.Logger) error { 117 + if log == nil { 118 + log = slog.Default().With("system", "events") 119 + } 112 120 ctx, cancel := context.WithCancel(ctx) 113 121 defer cancel() 114 122 defer sched.Shutdown() ··· 124 132 select { 125 133 case <-t.C: 126 134 if err := con.WriteControl(websocket.PingMessage, []byte{}, time.Now().Add(time.Second*10)); err != nil { 127 - log.Warnf("failed to ping: %s", err) 135 + log.Warn("failed to ping", "err", err) 128 136 } 129 137 case <-ctx.Done(): 130 138 con.Close() ··· 145 153 146 154 con.SetPongHandler(func(_ string) error { 147 155 if err := con.SetReadDeadline(time.Now().Add(time.Minute)); err != nil { 148 - log.Errorf("failed to set read deadline: %s", err) 156 + log.Error("failed to set read deadline", "err", err) 149 157 } 150 158 151 159 return nil ··· 194 202 } 195 203 196 204 if evt.Seq < lastSeq { 197 - log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq) 205 + log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq) 198 206 } 199 207 200 208 lastSeq = evt.Seq ··· 211 219 } 212 220 213 221 if evt.Seq < lastSeq { 214 - log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq) 222 + log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq) 215 223 } 216 224 lastSeq = evt.Seq 217 225 ··· 227 235 } 228 236 229 237 if evt.Seq < lastSeq { 230 - log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq) 238 + log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq) 231 239 } 232 240 lastSeq = evt.Seq 233 241 ··· 243 251 } 244 252 245 253 if evt.Seq < lastSeq { 246 - log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq) 254 + log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq) 247 255 } 248 256 lastSeq = evt.Seq 249 257 ··· 271 279 } 272 280 273 281 if evt.Seq < lastSeq { 274 - log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq) 282 + log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq) 275 283 } 276 284 lastSeq = evt.Seq 277 285 ··· 287 295 } 288 296 289 297 if evt.Seq < lastSeq { 290 - log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq) 298 + log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq) 291 299 } 292 300 lastSeq = evt.Seq 293 301 ··· 303 311 } 304 312 305 313 if evt.Seq < lastSeq { 306 - log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq) 314 + log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq) 307 315 } 308 316 309 317 lastSeq = evt.Seq
+2 -2
events/dbpersist.go
··· 131 131 132 132 if needsFlush { 133 133 if err := p.Flush(context.Background()); err != nil { 134 - log.Errorf("failed to flush batch: %s", err) 134 + log.Error("failed to flush batch", "err", err) 135 135 } 136 136 } 137 137 } ··· 323 323 func (p *DbPersistence) RecordFromRepoCommit(ctx context.Context, evt *comatproto.SyncSubscribeRepos_Commit) (*RepoEventRecord, error) { 324 324 // TODO: hack hack hack 325 325 if len(evt.Ops) > 8192 { 326 - log.Errorf("(VERY BAD) truncating ops field in outgoing event (len = %d)", len(evt.Ops)) 326 + log.Error("(VERY BAD) truncating ops field in outgoing event", "len", len(evt.Ops)) 327 327 evt.Ops = evt.Ops[:8192] 328 328 } 329 329
+13 -19
events/dbpersist_test.go
··· 1 - package events_test 1 + package events 2 2 3 3 import ( 4 4 "context" ··· 11 11 atproto "github.com/bluesky-social/indigo/api/atproto" 12 12 "github.com/bluesky-social/indigo/api/bsky" 13 13 "github.com/bluesky-social/indigo/carstore" 14 - "github.com/bluesky-social/indigo/events" 15 14 lexutil "github.com/bluesky-social/indigo/lex/util" 16 15 "github.com/bluesky-social/indigo/models" 17 - "github.com/bluesky-social/indigo/pds" 16 + pds "github.com/bluesky-social/indigo/pds/data" 18 17 "github.com/bluesky-social/indigo/repomgr" 19 18 "github.com/bluesky-social/indigo/util" 20 - "github.com/ipfs/go-log/v2" 21 19 "gorm.io/driver/sqlite" 22 20 "gorm.io/gorm" 23 21 ) 24 22 25 - func init() { 26 - log.SetAllLoggers(log.LevelDebug) 27 - } 28 - 29 23 func BenchmarkDBPersist(b *testing.B) { 30 24 ctx := context.Background() 31 25 ··· 61 55 defer os.RemoveAll(tempPath) 62 56 63 57 // Initialize a DBPersister 64 - dbp, err := events.NewDbPersistence(db, cs, nil) 58 + dbp, err := NewDbPersistence(db, cs, nil) 65 59 if err != nil { 66 60 b.Fatal(err) 67 61 } 68 62 69 63 // Create a bunch of events 70 - evtman := events.NewEventManager(dbp) 64 + evtman := NewEventManager(dbp) 71 65 72 66 userRepoHead, err := mgr.GetRepoRoot(ctx, 1) 73 67 if err != nil { 74 68 b.Fatal(err) 75 69 } 76 70 77 - inEvts := make([]*events.XRPCStreamEvent, b.N) 71 + inEvts := make([]*XRPCStreamEvent, b.N) 78 72 for i := 0; i < b.N; i++ { 79 73 cidLink := lexutil.LexLink(cid) 80 74 headLink := lexutil.LexLink(userRepoHead) 81 - inEvts[i] = &events.XRPCStreamEvent{ 75 + inEvts[i] = &XRPCStreamEvent{ 82 76 RepoCommit: &atproto.SyncSubscribeRepos_Commit{ 83 77 Repo: "did:example:123", 84 78 Commit: headLink, ··· 136 130 137 131 b.StopTimer() 138 132 139 - dbp.Playback(ctx, 0, func(evt *events.XRPCStreamEvent) error { 133 + dbp.Playback(ctx, 0, func(evt *XRPCStreamEvent) error { 140 134 outEvtCount++ 141 135 return nil 142 136 }) ··· 183 177 defer os.RemoveAll(tempPath) 184 178 185 179 // Initialize a DBPersister 186 - dbp, err := events.NewDbPersistence(db, cs, nil) 180 + dbp, err := NewDbPersistence(db, cs, nil) 187 181 if err != nil { 188 182 b.Fatal(err) 189 183 } 190 184 191 185 // Create a bunch of events 192 - evtman := events.NewEventManager(dbp) 186 + evtman := NewEventManager(dbp) 193 187 194 188 userRepoHead, err := mgr.GetRepoRoot(ctx, 1) 195 189 if err != nil { 196 190 b.Fatal(err) 197 191 } 198 192 199 - inEvts := make([]*events.XRPCStreamEvent, n) 193 + inEvts := make([]*XRPCStreamEvent, n) 200 194 for i := 0; i < n; i++ { 201 195 cidLink := lexutil.LexLink(cid) 202 196 headLink := lexutil.LexLink(userRepoHead) 203 - inEvts[i] = &events.XRPCStreamEvent{ 197 + inEvts[i] = &XRPCStreamEvent{ 204 198 RepoCommit: &atproto.SyncSubscribeRepos_Commit{ 205 199 Repo: "did:example:123", 206 200 Commit: headLink, ··· 256 250 257 251 b.ResetTimer() 258 252 259 - dbp.Playback(ctx, 0, func(evt *events.XRPCStreamEvent) error { 253 + dbp.Playback(ctx, 0, func(evt *XRPCStreamEvent) error { 260 254 outEvtCount++ 261 255 return nil 262 256 }) ··· 301 295 return nil, nil, nil, "", err 302 296 } 303 297 304 - cs, err := carstore.NewCarStore(cardb, cspath) 298 + cs, err := carstore.NewCarStore(cardb, []string{cspath}) 305 299 if err != nil { 306 300 return nil, nil, nil, "", err 307 301 }
+5 -5
events/diskpersist.go
··· 312 312 dp.lk.Lock() 313 313 if err := dp.flushLog(ctx); err != nil { 314 314 // TODO: this happening is quite bad. Need a recovery strategy 315 - log.Errorf("failed to flush disk log: %s", err) 315 + log.Error("failed to flush disk log", "err", err) 316 316 } 317 317 dp.lk.Unlock() 318 318 } ··· 354 354 case <-t.C: 355 355 if errs := dp.garbageCollect(ctx); len(errs) > 0 { 356 356 for _, err := range errs { 357 - log.Errorf("garbage collection error: %s", err) 357 + log.Error("garbage collection error", "err", err) 358 358 } 359 359 } 360 360 } ··· 430 430 refsGarbageCollected.WithLabelValues().Add(float64(refsDeleted)) 431 431 filesGarbageCollected.WithLabelValues().Add(float64(filesDeleted)) 432 432 433 - log.Infow("garbage collection complete", 433 + log.Info("garbage collection complete", 434 434 "filesDeleted", filesDeleted, 435 435 "refsDeleted", refsDeleted, 436 436 "oldRefsFound", oldRefsFound, ··· 696 696 return nil, err 697 697 } 698 698 if since > lastSeq { 699 - log.Errorw("playback cursor is greater than last seq of file checked", 699 + log.Error("playback cursor is greater than last seq of file checked", 700 700 "since", since, 701 701 "lastSeq", lastSeq, 702 702 "filename", fn, ··· 778 778 return nil, err 779 779 } 780 780 default: 781 - log.Warnw("unrecognized event kind coming from log file", "seq", h.Seq, "kind", h.Kind) 781 + log.Warn("unrecognized event kind coming from log file", "seq", h.Seq, "kind", h.Kind) 782 782 return nil, fmt.Errorf("halting on unrecognized event kind") 783 783 } 784 784 }
+41 -36
events/diskpersist_test.go
··· 1 - package events_test 1 + package events 2 2 3 3 import ( 4 4 "context" ··· 14 14 atproto "github.com/bluesky-social/indigo/api/atproto" 15 15 "github.com/bluesky-social/indigo/api/bsky" 16 16 "github.com/bluesky-social/indigo/carstore" 17 - "github.com/bluesky-social/indigo/events" 18 17 lexutil "github.com/bluesky-social/indigo/lex/util" 19 18 "github.com/bluesky-social/indigo/models" 20 - "github.com/bluesky-social/indigo/pds" 19 + pds "github.com/bluesky-social/indigo/pds/data" 21 20 "github.com/bluesky-social/indigo/repomgr" 22 21 "github.com/bluesky-social/indigo/util" 23 22 "gorm.io/gorm" 24 23 ) 25 24 26 - func TestDiskPersist(t *testing.T) { 25 + func testPersister(t *testing.T, perisistenceFactory func(path string, db *gorm.DB) (EventPersistence, error)) { 27 26 ctx := context.Background() 28 27 29 28 db, _, cs, tempPath, err := setupDBs(t) ··· 57 56 58 57 defer os.RemoveAll(tempPath) 59 58 60 - // Initialize a DBPersister 61 - 62 - dp, err := events.NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &events.DiskPersistOptions{ 63 - EventsPerFile: 10, 64 - UIDCacheSize: 100000, 65 - DIDCacheSize: 100000, 66 - }) 59 + // Initialize a persister 60 + dp, err := perisistenceFactory(tempPath, db) 67 61 if err != nil { 68 62 t.Fatal(err) 69 63 } 70 64 71 65 // Create a bunch of events 72 - evtman := events.NewEventManager(dp) 66 + evtman := NewEventManager(dp) 73 67 74 68 userRepoHead, err := mgr.GetRepoRoot(ctx, 1) 75 69 if err != nil { ··· 77 71 } 78 72 79 73 n := 100 80 - inEvts := make([]*events.XRPCStreamEvent, n) 74 + inEvts := make([]*XRPCStreamEvent, n) 81 75 for i := 0; i < n; i++ { 82 76 cidLink := lexutil.LexLink(cid) 83 77 headLink := lexutil.LexLink(userRepoHead) 84 - inEvts[i] = &events.XRPCStreamEvent{ 78 + inEvts[i] = &XRPCStreamEvent{ 85 79 RepoCommit: &atproto.SyncSubscribeRepos_Commit{ 86 80 Repo: "did:example:123", 87 81 Commit: headLink, ··· 93 87 }, 94 88 }, 95 89 Time: time.Now().Format(util.ISO8601), 90 + Seq: int64(i), 96 91 }, 97 92 } 98 93 } ··· 112 107 outEvtCount := 0 113 108 expectedEvtCount := n 114 109 115 - dp.Playback(ctx, 0, func(evt *events.XRPCStreamEvent) error { 110 + dp.Playback(ctx, 0, func(evt *XRPCStreamEvent) error { 116 111 outEvtCount++ 117 112 return nil 118 113 }) ··· 125 120 126 121 time.Sleep(time.Millisecond * 100) 127 122 128 - dp2, err := events.NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &events.DiskPersistOptions{ 123 + dp2, err := NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{ 129 124 EventsPerFile: 10, 130 125 UIDCacheSize: 100000, 131 126 DIDCacheSize: 100000, ··· 134 129 t.Fatal(err) 135 130 } 136 131 137 - evtman2 := events.NewEventManager(dp2) 132 + evtman2 := NewEventManager(dp2) 138 133 139 - inEvts = make([]*events.XRPCStreamEvent, n) 134 + inEvts = make([]*XRPCStreamEvent, n) 140 135 for i := 0; i < n; i++ { 141 136 cidLink := lexutil.LexLink(cid) 142 137 headLink := lexutil.LexLink(userRepoHead) 143 - inEvts[i] = &events.XRPCStreamEvent{ 138 + inEvts[i] = &XRPCStreamEvent{ 144 139 RepoCommit: &atproto.SyncSubscribeRepos_Commit{ 145 140 Repo: "did:example:123", 146 141 Commit: headLink, ··· 163 158 } 164 159 } 165 160 } 161 + func TestDiskPersist(t *testing.T) { 162 + factory := func(tempPath string, db *gorm.DB) (EventPersistence, error) { 163 + return NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{ 164 + EventsPerFile: 10, 165 + UIDCacheSize: 100000, 166 + DIDCacheSize: 100000, 167 + }) 168 + } 169 + testPersister(t, factory) 170 + } 166 171 167 172 func BenchmarkDiskPersist(b *testing.B) { 168 173 db, _, cs, tempPath, err := setupDBs(b) ··· 174 179 175 180 // Initialize a DBPersister 176 181 177 - dp, err := events.NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &events.DiskPersistOptions{ 182 + dp, err := NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{ 178 183 EventsPerFile: 5000, 179 184 UIDCacheSize: 100000, 180 185 DIDCacheSize: 100000, ··· 187 192 188 193 } 189 194 190 - func runPersisterBenchmark(b *testing.B, cs carstore.CarStore, db *gorm.DB, p events.EventPersistence) { 195 + func runPersisterBenchmark(b *testing.B, cs carstore.CarStore, db *gorm.DB, p EventPersistence) { 191 196 ctx := context.Background() 192 197 193 198 db.AutoMigrate(&pds.User{}) ··· 215 220 } 216 221 217 222 // Create a bunch of events 218 - evtman := events.NewEventManager(p) 223 + evtman := NewEventManager(p) 219 224 220 225 userRepoHead, err := mgr.GetRepoRoot(ctx, 1) 221 226 if err != nil { 222 227 b.Fatal(err) 223 228 } 224 229 225 - inEvts := make([]*events.XRPCStreamEvent, b.N) 230 + inEvts := make([]*XRPCStreamEvent, b.N) 226 231 for i := 0; i < b.N; i++ { 227 232 cidLink := lexutil.LexLink(cid) 228 233 headLink := lexutil.LexLink(userRepoHead) 229 - inEvts[i] = &events.XRPCStreamEvent{ 234 + inEvts[i] = &XRPCStreamEvent{ 230 235 RepoCommit: &atproto.SyncSubscribeRepos_Commit{ 231 236 Repo: "did:example:123", 232 237 Commit: headLink, ··· 290 295 291 296 // Initialize a DBPersister 292 297 293 - dp, err := events.NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &events.DiskPersistOptions{ 298 + dp, err := NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{ 294 299 EventsPerFile: 20, 295 300 UIDCacheSize: 100000, 296 301 DIDCacheSize: 100000, ··· 302 307 runEventManagerTest(t, cs, db, dp) 303 308 } 304 309 305 - func runEventManagerTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p events.EventPersistence) { 310 + func runEventManagerTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p EventPersistence) { 306 311 ctx := context.Background() 307 312 308 313 db.AutoMigrate(&pds.User{}) ··· 329 334 t.Fatal(err) 330 335 } 331 336 332 - evtman := events.NewEventManager(p) 337 + evtman := NewEventManager(p) 333 338 334 339 userRepoHead, err := mgr.GetRepoRoot(ctx, 1) 335 340 if err != nil { ··· 337 342 } 338 343 339 344 testSize := 100 // you can adjust this number as needed 340 - inEvts := make([]*events.XRPCStreamEvent, testSize) 345 + inEvts := make([]*XRPCStreamEvent, testSize) 341 346 for i := 0; i < testSize; i++ { 342 347 cidLink := lexutil.LexLink(cid) 343 348 headLink := lexutil.LexLink(userRepoHead) 344 - inEvts[i] = &events.XRPCStreamEvent{ 349 + inEvts[i] = &XRPCStreamEvent{ 345 350 RepoCommit: &atproto.SyncSubscribeRepos_Commit{ 346 351 Repo: "did:example:123", 347 352 Commit: headLink, ··· 368 373 } 369 374 370 375 outEvtCount := 0 371 - p.Playback(ctx, 0, func(evt *events.XRPCStreamEvent) error { 376 + p.Playback(ctx, 0, func(evt *XRPCStreamEvent) error { 372 377 // Check that the contents of the output events match the input events 373 378 // Clear cache, don't care if one has it and not the other 374 379 inEvts[outEvtCount].Preserialized = nil ··· 397 402 398 403 // Initialize a DBPersister 399 404 400 - dp, err := events.NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &events.DiskPersistOptions{ 405 + dp, err := NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{ 401 406 EventsPerFile: 10, 402 407 UIDCacheSize: 100000, 403 408 DIDCacheSize: 100000, ··· 409 414 runTakedownTest(t, cs, db, dp) 410 415 } 411 416 412 - func runTakedownTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p events.EventPersistence) { 417 + func runTakedownTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p EventPersistence) { 413 418 ctx := context.TODO() 414 419 415 420 db.AutoMigrate(&pds.User{}) ··· 439 444 } 440 445 } 441 446 442 - evtman := events.NewEventManager(p) 447 + evtman := NewEventManager(p) 443 448 444 449 testSize := 100 // you can adjust this number as needed 445 - inEvts := make([]*events.XRPCStreamEvent, testSize*userCount) 450 + inEvts := make([]*XRPCStreamEvent, testSize*userCount) 446 451 for i := 0; i < testSize*userCount; i++ { 447 452 user := users[i%userCount] 448 453 _, cid, err := mgr.CreateRecord(ctx, user.Uid, "app.bsky.feed.post", &bsky.FeedPost{ ··· 460 465 461 466 cidLink := lexutil.LexLink(cid) 462 467 headLink := lexutil.LexLink(userRepoHead) 463 - inEvts[i] = &events.XRPCStreamEvent{ 468 + inEvts[i] = &XRPCStreamEvent{ 464 469 RepoCommit: &atproto.SyncSubscribeRepos_Commit{ 465 470 Repo: user.Did, 466 471 Commit: headLink, ··· 495 500 496 501 // Verify that the events of the user have been removed from the event stream 497 502 var evtsCount int 498 - if err := p.Playback(ctx, 0, func(evt *events.XRPCStreamEvent) error { 503 + if err := p.Playback(ctx, 0, func(evt *XRPCStreamEvent) error { 499 504 evtsCount++ 500 505 if evt.RepoCommit.Repo == takeDownUser.Did { 501 506 t.Fatalf("found event for user %d after takedown", takeDownUser.Uid)
+94 -13
events/events.go
··· 6 6 "errors" 7 7 "fmt" 8 8 "io" 9 + "log/slog" 9 10 "sync" 10 11 "time" 11 12 ··· 14 15 "github.com/bluesky-social/indigo/models" 15 16 "github.com/prometheus/client_golang/prometheus" 16 17 17 - logging "github.com/ipfs/go-log" 18 18 cbg "github.com/whyrusleeping/cbor-gen" 19 19 "go.opentelemetry.io/otel" 20 20 ) 21 21 22 - var log = logging.Logger("events") 22 + var log = slog.Default().With("system", "events") 23 23 24 24 type Scheduler interface { 25 25 AddWork(ctx context.Context, repo string, val *XRPCStreamEvent) error ··· 34 34 crossoverBufferSize int 35 35 36 36 persister EventPersistence 37 + 38 + log *slog.Logger 37 39 } 38 40 39 41 func NewEventManager(persister EventPersistence) *EventManager { ··· 41 43 bufferSize: 16 << 10, 42 44 crossoverBufferSize: 512, 43 45 persister: persister, 46 + log: slog.Default().With("system", "events"), 44 47 } 45 48 46 49 persister.SetEventBroadcaster(em.broadcastEvent) ··· 67 70 func (em *EventManager) broadcastEvent(evt *XRPCStreamEvent) { 68 71 // the main thing we do is send it out, so MarshalCBOR once 69 72 if err := evt.Preserialize(); err != nil { 70 - log.Errorf("broadcast serialize failed, %s", err) 73 + em.log.Error("broadcast serialize failed", "err", err) 71 74 // serialize isn't going to go better later, this event is cursed 72 75 return 73 76 } ··· 93 96 // code 94 97 s.filter = func(*XRPCStreamEvent) bool { return false } 95 98 96 - log.Warnw("dropping slow consumer due to event overflow", "bufferSize", len(s.outgoing), "ident", s.ident) 99 + em.log.Warn("dropping slow consumer due to event overflow", "bufferSize", len(s.outgoing), "ident", s.ident) 97 100 go func(torem *Subscriber) { 98 101 torem.lk.Lock() 99 102 if !torem.cleanedUp { ··· 104 107 }, 105 108 }: 106 109 case <-time.After(time.Second * 5): 107 - log.Warnw("failed to send error frame to backed up consumer", "ident", torem.ident) 110 + em.log.Warn("failed to send error frame to backed up consumer", "ident", torem.ident) 108 111 } 109 112 } 110 113 torem.lk.Unlock() ··· 121 124 // accept a uid. The lookup inside the persister is notably expensive (despite 122 125 // being an lru cache?) 123 126 if err := em.persister.Persist(ctx, evt); err != nil { 124 - log.Errorf("failed to persist outbound event: %s", err) 127 + em.log.Error("failed to persist outbound event", "err", err) 125 128 } 126 129 } 127 130 ··· 219 222 return obj.MarshalCBOR(cborWriter) 220 223 } 221 224 225 + func (xevt *XRPCStreamEvent) Deserialize(r io.Reader) error { 226 + var header EventHeader 227 + if err := header.UnmarshalCBOR(r); err != nil { 228 + return fmt.Errorf("reading header: %w", err) 229 + } 230 + switch header.Op { 231 + case EvtKindMessage: 232 + switch header.MsgType { 233 + case "#commit": 234 + var evt comatproto.SyncSubscribeRepos_Commit 235 + if err := evt.UnmarshalCBOR(r); err != nil { 236 + return fmt.Errorf("reading repoCommit event: %w", err) 237 + } 238 + xevt.RepoCommit = &evt 239 + case "#handle": 240 + var evt comatproto.SyncSubscribeRepos_Handle 241 + if err := evt.UnmarshalCBOR(r); err != nil { 242 + return err 243 + } 244 + xevt.RepoHandle = &evt 245 + case "#identity": 246 + var evt comatproto.SyncSubscribeRepos_Identity 247 + if err := evt.UnmarshalCBOR(r); err != nil { 248 + return err 249 + } 250 + xevt.RepoIdentity = &evt 251 + case "#account": 252 + var evt comatproto.SyncSubscribeRepos_Account 253 + if err := evt.UnmarshalCBOR(r); err != nil { 254 + return err 255 + } 256 + xevt.RepoAccount = &evt 257 + case "#info": 258 + // TODO: this might also be a LabelInfo (as opposed to RepoInfo) 259 + var evt comatproto.SyncSubscribeRepos_Info 260 + if err := evt.UnmarshalCBOR(r); err != nil { 261 + return err 262 + } 263 + xevt.RepoInfo = &evt 264 + case "#migrate": 265 + var evt comatproto.SyncSubscribeRepos_Migrate 266 + if err := evt.UnmarshalCBOR(r); err != nil { 267 + return err 268 + } 269 + xevt.RepoMigrate = &evt 270 + case "#tombstone": 271 + var evt comatproto.SyncSubscribeRepos_Tombstone 272 + if err := evt.UnmarshalCBOR(r); err != nil { 273 + return err 274 + } 275 + xevt.RepoTombstone = &evt 276 + case "#labels": 277 + var evt comatproto.LabelSubscribeLabels_Labels 278 + if err := evt.UnmarshalCBOR(r); err != nil { 279 + return fmt.Errorf("reading Labels event: %w", err) 280 + } 281 + xevt.LabelLabels = &evt 282 + } 283 + case EvtKindErrorFrame: 284 + var errframe ErrorFrame 285 + if err := errframe.UnmarshalCBOR(r); err != nil { 286 + return err 287 + } 288 + xevt.Error = &errframe 289 + default: 290 + return fmt.Errorf("unrecognized event stream type: %d", header.Op) 291 + } 292 + return nil 293 + } 294 + 295 + var ErrNoSeq = errors.New("event has no sequence number") 296 + 222 297 // serialize content into Preserialized cache 223 298 func (evt *XRPCStreamEvent) Preserialize() error { 224 299 if evt.Preserialized != nil { ··· 290 365 case <-done: 291 366 return ErrPlaybackShutdown 292 367 case out <- e: 293 - seq := sequenceForEvent(e) 368 + seq := SequenceForEvent(e) 294 369 if seq > 0 { 295 370 lastSeq = seq 296 371 } ··· 298 373 } 299 374 }); err != nil { 300 375 if errors.Is(err, ErrPlaybackShutdown) { 301 - log.Warnf("events playback: %s", err) 376 + em.log.Warn("events playback", "err", err) 302 377 } else { 303 - log.Errorf("events playback: %s", err) 378 + em.log.Error("events playback", "err", err) 304 379 } 305 380 306 381 // TODO: send an error frame or something? ··· 315 390 316 391 // run playback again to get us to the events that have started buffering 317 392 if err := em.persister.Playback(ctx, lastSeq, func(e *XRPCStreamEvent) error { 318 - seq := sequenceForEvent(e) 319 - if seq > sequenceForEvent(first) { 393 + seq := SequenceForEvent(e) 394 + if seq > SequenceForEvent(first) { 320 395 return ErrCaughtUp 321 396 } 322 397 ··· 328 403 } 329 404 }); err != nil { 330 405 if !errors.Is(err, ErrCaughtUp) { 331 - log.Errorf("events playback: %s", err) 406 + em.log.Error("events playback", "err", err) 332 407 333 408 // TODO: send an error frame or something? 334 409 close(out) ··· 351 426 return out, sub.cleanup, nil 352 427 } 353 428 354 - func sequenceForEvent(evt *XRPCStreamEvent) int64 { 429 + func SequenceForEvent(evt *XRPCStreamEvent) int64 { 430 + return evt.Sequence() 431 + } 432 + 433 + func (evt *XRPCStreamEvent) Sequence() int64 { 355 434 switch { 356 435 case evt == nil: 357 436 return -1 ··· 365 444 return evt.RepoTombstone.Seq 366 445 case evt.RepoIdentity != nil: 367 446 return evt.RepoIdentity.Seq 447 + case evt.RepoAccount != nil: 448 + return evt.RepoAccount.Seq 368 449 case evt.RepoInfo != nil: 369 450 return -1 370 451 case evt.Error != nil:
+262
events/pebblepersist.go
··· 1 + package events 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + "encoding/binary" 7 + "encoding/hex" 8 + "errors" 9 + "fmt" 10 + "time" 11 + 12 + "github.com/bluesky-social/indigo/models" 13 + "github.com/cockroachdb/pebble" 14 + ) 15 + 16 + type PebblePersist struct { 17 + broadcast func(*XRPCStreamEvent) 18 + db *pebble.DB 19 + 20 + prevSeq int64 21 + prevSeqExtra uint32 22 + 23 + cancel func() 24 + 25 + options PebblePersistOptions 26 + } 27 + 28 + type PebblePersistOptions struct { 29 + // path where pebble will create a directory full of files 30 + DbPath string 31 + 32 + // Throw away posts older than some time ago 33 + PersistDuration time.Duration 34 + 35 + // Throw away old posts every so often 36 + GCPeriod time.Duration 37 + 38 + // MaxBytes is what we _try_ to keep disk usage under 39 + MaxBytes uint64 40 + } 41 + 42 + var DefaultPebblePersistOptions = PebblePersistOptions{ 43 + PersistDuration: time.Minute * 20, 44 + GCPeriod: time.Minute * 5, 45 + MaxBytes: 1024 * 1024 * 1024, // 1 GiB 46 + } 47 + 48 + // Create a new EventPersistence which stores data in pebbledb 49 + // nil opts is ok 50 + func NewPebblePersistance(opts *PebblePersistOptions) (*PebblePersist, error) { 51 + if opts == nil { 52 + opts = &DefaultPebblePersistOptions 53 + } 54 + db, err := pebble.Open(opts.DbPath, &pebble.Options{}) 55 + if err != nil { 56 + return nil, fmt.Errorf("%s: %w", opts.DbPath, err) 57 + } 58 + pp := new(PebblePersist) 59 + pp.options = *opts 60 + pp.db = db 61 + return pp, nil 62 + } 63 + 64 + func setKeySeqMillis(key []byte, seq, millis int64) { 65 + binary.BigEndian.PutUint64(key[:8], uint64(seq)) 66 + binary.BigEndian.PutUint64(key[8:16], uint64(millis)) 67 + } 68 + 69 + func (pp *PebblePersist) Persist(ctx context.Context, e *XRPCStreamEvent) error { 70 + err := e.Preserialize() 71 + if err != nil { 72 + return err 73 + } 74 + blob := e.Preserialized 75 + 76 + seq := e.Sequence() 77 + nowMillis := time.Now().UnixMilli() 78 + 79 + if seq < 0 { 80 + // persist with longer key {prev 8 byte key}{time}{int32 extra counter} 81 + pp.prevSeqExtra++ 82 + var key [20]byte 83 + setKeySeqMillis(key[:], seq, nowMillis) 84 + binary.BigEndian.PutUint32(key[16:], pp.prevSeqExtra) 85 + 86 + err = pp.db.Set(key[:], blob, pebble.Sync) 87 + } else { 88 + pp.prevSeq = seq 89 + pp.prevSeqExtra = 0 90 + var key [16]byte 91 + setKeySeqMillis(key[:], seq, nowMillis) 92 + 93 + err = pp.db.Set(key[:], blob, pebble.Sync) 94 + } 95 + 96 + if err != nil { 97 + return err 98 + } 99 + pp.broadcast(e) 100 + 101 + return err 102 + } 103 + 104 + func eventFromPebbleIter(iter *pebble.Iterator) (*XRPCStreamEvent, error) { 105 + blob, err := iter.ValueAndErr() 106 + if err != nil { 107 + return nil, err 108 + } 109 + br := bytes.NewReader(blob) 110 + evt := new(XRPCStreamEvent) 111 + err = evt.Deserialize(br) 112 + if err != nil { 113 + return nil, err 114 + } 115 + evt.Preserialized = bytes.Clone(blob) 116 + return evt, nil 117 + } 118 + 119 + func (pp *PebblePersist) Playback(ctx context.Context, since int64, cb func(*XRPCStreamEvent) error) error { 120 + var key [8]byte 121 + binary.BigEndian.PutUint64(key[:], uint64(since)) 122 + 123 + iter, err := pp.db.NewIterWithContext(ctx, &pebble.IterOptions{LowerBound: key[:]}) 124 + if err != nil { 125 + return err 126 + } 127 + defer iter.Close() 128 + 129 + for iter.First(); iter.Valid(); iter.Next() { 130 + evt, err := eventFromPebbleIter(iter) 131 + if err != nil { 132 + return err 133 + } 134 + 135 + err = cb(evt) 136 + if err != nil { 137 + return err 138 + } 139 + } 140 + 141 + return nil 142 + } 143 + func (pp *PebblePersist) TakeDownRepo(ctx context.Context, usr models.Uid) error { 144 + // TODO: implement filter on playback to ignore taken-down-repos? 145 + return nil 146 + } 147 + func (pp *PebblePersist) Flush(context.Context) error { 148 + return pp.db.Flush() 149 + } 150 + func (pp *PebblePersist) Shutdown(context.Context) error { 151 + if pp.cancel != nil { 152 + pp.cancel() 153 + } 154 + err := pp.db.Close() 155 + pp.db = nil 156 + return err 157 + } 158 + 159 + func (pp *PebblePersist) SetEventBroadcaster(broadcast func(*XRPCStreamEvent)) { 160 + pp.broadcast = broadcast 161 + } 162 + 163 + var ErrNoLast = errors.New("no last event") 164 + 165 + func (pp *PebblePersist) GetLast(ctx context.Context) (seq, millis int64, evt *XRPCStreamEvent, err error) { 166 + iter, err := pp.db.NewIterWithContext(ctx, &pebble.IterOptions{}) 167 + if err != nil { 168 + return 0, 0, nil, err 169 + } 170 + ok := iter.Last() 171 + if !ok { 172 + return 0, 0, nil, ErrNoLast 173 + } 174 + evt, err = eventFromPebbleIter(iter) 175 + keyblob := iter.Key() 176 + seq = int64(binary.BigEndian.Uint64(keyblob[:8])) 177 + millis = int64(binary.BigEndian.Uint64(keyblob[8:16])) 178 + return seq, millis, evt, nil 179 + } 180 + 181 + // example; 182 + // ``` 183 + // pp := NewPebblePersistance("/tmp/foo.pebble") 184 + // go pp.GCThread(context.Background(), 48 * time.Hour, 5 * time.Minute) 185 + // ``` 186 + func (pp *PebblePersist) GCThread(ctx context.Context) { 187 + ctx, cancel := context.WithCancel(ctx) 188 + pp.cancel = cancel 189 + ticker := time.NewTicker(pp.options.GCPeriod) 190 + defer ticker.Stop() 191 + for { 192 + select { 193 + case <-ticker.C: 194 + err := pp.GarbageCollect(ctx) 195 + if err != nil { 196 + log.Error("GC err", "err", err) 197 + } 198 + case <-ctx.Done(): 199 + return 200 + } 201 + } 202 + } 203 + 204 + var zeroKey [16]byte 205 + var ffffKey [16]byte 206 + 207 + func init() { 208 + setKeySeqMillis(zeroKey[:], 0, 0) 209 + for i := range ffffKey { 210 + ffffKey[i] = 0xff 211 + } 212 + } 213 + 214 + func (pp *PebblePersist) GarbageCollect(ctx context.Context) error { 215 + nowMillis := time.Now().UnixMilli() 216 + expired := nowMillis - pp.options.PersistDuration.Milliseconds() 217 + iter, err := pp.db.NewIterWithContext(ctx, &pebble.IterOptions{}) 218 + if err != nil { 219 + return err 220 + } 221 + defer iter.Close() 222 + // scan keys to find last expired, then delete range 223 + var seq int64 = int64(-1) 224 + var lastKeyTime int64 225 + for iter.First(); iter.Valid(); iter.Next() { 226 + keyblob := iter.Key() 227 + 228 + keyTime := int64(binary.BigEndian.Uint64(keyblob[8:16])) 229 + if keyTime <= expired { 230 + lastKeyTime = keyTime 231 + seq = int64(binary.BigEndian.Uint64(keyblob[:8])) 232 + } else { 233 + break 234 + } 235 + } 236 + 237 + // TODO: use pp.options.MaxBytes 238 + 239 + sizeBefore, _ := pp.db.EstimateDiskUsage(zeroKey[:], ffffKey[:]) 240 + if seq == -1 { 241 + // nothing to delete 242 + log.Info("pebble gc nop", "size", sizeBefore) 243 + return nil 244 + } 245 + var key [16]byte 246 + setKeySeqMillis(key[:], seq, lastKeyTime) 247 + log.Info("pebble gc start", "to", hex.EncodeToString(key[:])) 248 + err = pp.db.DeleteRange(zeroKey[:], key[:], pebble.Sync) 249 + if err != nil { 250 + return err 251 + } 252 + sizeAfter, _ := pp.db.EstimateDiskUsage(zeroKey[:], ffffKey[:]) 253 + log.Info("pebble gc", "before", sizeBefore, "after", sizeAfter) 254 + start := time.Now() 255 + err = pp.db.Compact(zeroKey[:], key[:], true) 256 + if err != nil { 257 + log.Warn("pebble gc compact", "err", err) 258 + } 259 + dt := time.Since(start) 260 + log.Info("pebble gc compact ok", "dt", dt) 261 + return nil 262 + }
+16
events/pebblepersist_test.go
··· 1 + package events 2 + 3 + import ( 4 + "gorm.io/gorm" 5 + "path/filepath" 6 + "testing" 7 + ) 8 + 9 + func TestPebblePersist(t *testing.T) { 10 + factory := func(tempPath string, db *gorm.DB) (EventPersistence, error) { 11 + opts := DefaultPebblePersistOptions 12 + opts.DbPath = filepath.Join(tempPath, "pebble.db") 13 + return NewPebblePersistance(&opts) 14 + } 15 + testPersister(t, factory) 16 + }
+14 -12
events/schedulers/autoscaling/autoscaling.go
··· 2 2 3 3 import ( 4 4 "context" 5 + "log/slog" 5 6 "sync" 6 7 "time" 7 8 8 9 "github.com/bluesky-social/indigo/events" 9 10 "github.com/bluesky-social/indigo/events/schedulers" 10 - logging "github.com/ipfs/go-log" 11 11 "github.com/prometheus/client_golang/prometheus" 12 12 ) 13 - 14 - var log = logging.Logger("autoscaling-scheduler") 15 13 16 14 // Scheduler is a scheduler that will scale up and down the number of workers based on the throughput of the workers. 17 15 type Scheduler struct { ··· 40 38 autoscaleFrequency time.Duration 41 39 autoscalerIn chan struct{} 42 40 autoscalerOut chan struct{} 41 + 42 + log *slog.Logger 43 43 } 44 44 45 45 type AutoscaleSettings struct { ··· 99 99 autoscaleFrequency: autoscaleSettings.AutoscaleFrequency, 100 100 autoscalerIn: make(chan struct{}), 101 101 autoscalerOut: make(chan struct{}), 102 + 103 + log: slog.Default().With("system", "autoscaling-scheduler"), 102 104 } 103 105 104 106 for i := 0; i < p.concurrency; i++ { ··· 111 113 } 112 114 113 115 func (p *Scheduler) Shutdown() { 114 - log.Debugf("shutting down autoscaling scheduler for %s", p.ident) 116 + p.log.Debug("shutting down autoscaling scheduler", "ident", p.ident) 115 117 116 118 // stop autoscaling 117 119 p.autoscalerIn <- struct{}{} 118 120 close(p.autoscalerIn) 119 121 <-p.autoscalerOut 120 122 121 - log.Debug("stopping autoscaling scheduler workers") 123 + p.log.Debug("stopping autoscaling scheduler workers") 122 124 // stop workers 123 125 for i := 0; i < p.concurrency; i++ { 124 126 p.feeder <- &consumerTask{signal: "stop"} 125 127 } 126 128 close(p.feeder) 127 129 128 - log.Debug("waiting for autoscaling scheduler workers to stop") 130 + p.log.Debug("waiting for autoscaling scheduler workers to stop") 129 131 130 132 p.workerGroup.Wait() 131 133 132 - log.Debug("stopping autoscaling scheduler throughput manager") 134 + p.log.Debug("stopping autoscaling scheduler throughput manager") 133 135 p.throughputManager.Stop() 134 136 135 - log.Debug("autoscaling scheduler shutdown complete") 137 + p.log.Debug("autoscaling scheduler shutdown complete") 136 138 } 137 139 138 140 // Add autoscaling function ··· 197 199 } 198 200 199 201 func (p *Scheduler) worker() { 200 - log.Debugf("starting autoscaling worker for %s", p.ident) 202 + p.log.Debug("starting autoscaling worker", "ident", p.ident) 201 203 p.workersActive.Inc() 202 204 p.workerGroup.Add(1) 203 205 defer p.workerGroup.Done() ··· 205 207 for work != nil { 206 208 // Check if the work item contains a signal to stop the worker. 207 209 if work.signal == "stop" { 208 - log.Debugf("stopping autoscaling worker for %s", p.ident) 210 + p.log.Debug("stopping autoscaling worker", "ident", p.ident) 209 211 p.workersActive.Dec() 210 212 return 211 213 } 212 214 213 215 p.itemsActive.Inc() 214 216 if err := p.do(context.TODO(), work.val); err != nil { 215 - log.Errorf("event handler failed: %s", err) 217 + p.log.Error("event handler failed", "err", err) 216 218 } 217 219 p.itemsProcessed.Inc() 218 220 219 221 p.lk.Lock() 220 222 rem, ok := p.active[work.repo] 221 223 if !ok { 222 - log.Errorf("should always have an 'active' entry if a worker is processing a job") 224 + p.log.Error("should always have an 'active' entry if a worker is processing a job") 223 225 } 224 226 225 227 if len(rem) == 0 {
+9 -7
events/schedulers/parallel/parallel.go
··· 2 2 3 3 import ( 4 4 "context" 5 + "log/slog" 5 6 "sync" 6 7 7 8 "github.com/bluesky-social/indigo/events" 8 9 "github.com/bluesky-social/indigo/events/schedulers" 9 - logging "github.com/ipfs/go-log" 10 10 11 11 "github.com/prometheus/client_golang/prometheus" 12 12 ) 13 - 14 - var log = logging.Logger("parallel-scheduler") 15 13 16 14 // Scheduler is a parallel scheduler that will run work on a fixed number of workers 17 15 type Scheduler struct { ··· 33 31 itemsProcessed prometheus.Counter 34 32 itemsActive prometheus.Counter 35 33 workesActive prometheus.Gauge 34 + 35 + log *slog.Logger 36 36 } 37 37 38 38 func NewScheduler(maxC, maxQ int, ident string, do func(context.Context, *events.XRPCStreamEvent) error) *Scheduler { ··· 52 52 itemsProcessed: schedulers.WorkItemsProcessed.WithLabelValues(ident, "parallel"), 53 53 itemsActive: schedulers.WorkItemsActive.WithLabelValues(ident, "parallel"), 54 54 workesActive: schedulers.WorkersActive.WithLabelValues(ident, "parallel"), 55 + 56 + log: slog.Default().With("system", "parallel-scheduler"), 55 57 } 56 58 57 59 for i := 0; i < maxC; i++ { ··· 64 66 } 65 67 66 68 func (p *Scheduler) Shutdown() { 67 - log.Infof("shutting down parallel scheduler for %s", p.ident) 69 + p.log.Info("shutting down parallel scheduler", "ident", p.ident) 68 70 69 71 for i := 0; i < p.maxConcurrency; i++ { 70 72 p.feeder <- &consumerTask{ ··· 78 80 <-p.out 79 81 } 80 82 81 - log.Info("parallel scheduler shutdown complete") 83 + p.log.Info("parallel scheduler shutdown complete") 82 84 } 83 85 84 86 type consumerTask struct { ··· 123 125 124 126 p.itemsActive.Inc() 125 127 if err := p.do(context.TODO(), work.val); err != nil { 126 - log.Errorf("event handler failed: %s", err) 128 + p.log.Error("event handler failed", "err", err) 127 129 } 128 130 p.itemsProcessed.Inc() 129 131 130 132 p.lk.Lock() 131 133 rem, ok := p.active[work.repo] 132 134 if !ok { 133 - log.Errorf("should always have an 'active' entry if a worker is processing a job") 135 + p.log.Error("should always have an 'active' entry if a worker is processing a job") 134 136 } 135 137 136 138 if len(rem) == 0 {
+1 -3
events/schedulers/sequential/sequential.go
··· 2 2 3 3 import ( 4 4 "context" 5 - 6 5 "github.com/bluesky-social/indigo/events" 7 6 "github.com/bluesky-social/indigo/events/schedulers" 8 - logging "github.com/ipfs/go-log" 9 7 "github.com/prometheus/client_golang/prometheus" 10 8 ) 11 9 12 - var log = logging.Logger("sequential-scheduler") 10 + // var log = slog.Default().With("system", "sequential-scheduler") 13 11 14 12 // Scheduler is a sequential scheduler that will run work on a single worker 15 13 type Scheduler struct {
+3 -7
fakedata/accounts.go
··· 20 20 21 21 func (ac *AccountCatalog) Combined() []AccountContext { 22 22 var combined []AccountContext 23 - for _, c := range ac.Celebs { 24 - combined = append(combined, c) 25 - } 26 - for _, r := range ac.Regulars { 27 - combined = append(combined, r) 28 - } 23 + combined = append(combined, ac.Celebs...) 24 + combined = append(combined, ac.Regulars...) 29 25 return combined 30 26 } 31 27 ··· 72 68 return nil, fmt.Errorf("account index didn't match: %d != %d (%s)", i, u.Index, u.AccountType) 73 69 } 74 70 } 75 - log.Infof("loaded account catalog: regular=%d celebrity=%d", len(catalog.Regulars), len(catalog.Celebs)) 71 + log.Info("loaded account catalog", "regular", len(catalog.Regulars), "celebrity", len(catalog.Celebs)) 76 72 return catalog, nil 77 73 } 78 74
+8 -4
fakedata/generators.go
··· 7 7 "bytes" 8 8 "context" 9 9 "fmt" 10 + "log/slog" 10 11 "math/rand" 11 12 "time" 12 13 ··· 16 17 "github.com/bluesky-social/indigo/xrpc" 17 18 18 19 "github.com/brianvoe/gofakeit/v6" 19 - logging "github.com/ipfs/go-log" 20 20 ) 21 21 22 - var log = logging.Logger("fakedata") 22 + var log = slog.Default().With("system", "fakedata") 23 + 24 + func SetLogger(logger *slog.Logger) { 25 + log = logger 26 + } 23 27 24 28 func MeasureIterations(name string) func(int) { 25 29 start := time.Now() ··· 28 32 return 29 33 } 30 34 total := time.Since(start) 31 - log.Infof("%s wall runtime: count=%d total=%s mean=%s", name, count, total, total/time.Duration(count)) 35 + log.Info("wall runtime", "name", name, "count", count, "total", total, "rate", total/time.Duration(count)) 32 36 } 33 37 } 34 38 ··· 386 390 func BrowseAccount(xrpcc *xrpc.Client, acc *AccountContext) error { 387 391 // fetch notifications 388 392 maxNotif := 50 389 - resp, err := appbsky.NotificationListNotifications(context.TODO(), xrpcc, "", int64(maxNotif), false, "") 393 + resp, err := appbsky.NotificationListNotifications(context.TODO(), xrpcc, "", int64(maxNotif), false, nil, "") 390 394 if err != nil { 391 395 return err 392 396 }
+21 -10
go.mod
··· 1 1 module github.com/bluesky-social/indigo 2 2 3 - go 1.22 3 + go 1.23 4 4 5 5 require ( 6 6 contrib.go.opencensus.io/exporter/prometheus v0.4.2 ··· 8 8 github.com/RussellLuo/slidingwindow v0.0.0-20200528002341-535bb99d338b 9 9 github.com/adrg/xdg v0.5.0 10 10 github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de 11 + github.com/bradfitz/gomemcache v0.0.0-20230905024940-24af94b03874 11 12 github.com/brianvoe/gofakeit/v6 v6.25.0 12 13 github.com/carlmjohnson/versioninfo v0.22.5 14 + github.com/cockroachdb/pebble v1.1.2 13 15 github.com/dustinkirkland/golang-petname v0.0.0-20231002161417-6a283f1aaaf2 14 16 github.com/flosch/pongo2/v6 v6.0.0 15 17 github.com/go-redis/cache/v9 v9.0.0 16 18 github.com/goccy/go-json v0.10.2 17 - github.com/gocql/gocql v1.7.0 18 19 github.com/golang-jwt/jwt v3.2.2+incompatible 19 20 github.com/gorilla/websocket v1.5.1 20 21 github.com/hashicorp/go-retryablehttp v0.7.5 ··· 30 31 github.com/ipfs/go-ipld-cbor v0.1.0 31 32 github.com/ipfs/go-ipld-format v0.6.0 32 33 github.com/ipfs/go-libipfs v0.7.0 33 - github.com/ipfs/go-log v1.0.5 34 34 github.com/ipfs/go-log/v2 v2.5.1 35 35 github.com/ipld/go-car v0.6.1-0.20230509095817-92d28eb23ba4 36 36 github.com/ipld/go-car/v2 v2.13.1 ··· 56 56 github.com/urfave/cli/v2 v2.25.7 57 57 github.com/whyrusleeping/cbor-gen v0.2.1-0.20241030202151-b7a6831be65e 58 58 github.com/whyrusleeping/go-did v0.0.0-20230824162731-404d1707d5d6 59 + github.com/xlab/treeprint v1.2.0 59 60 gitlab.com/yawning/secp256k1-voi v0.0.0-20230925100816-f2616030848b 60 61 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1 61 62 go.opentelemetry.io/otel v1.21.0 ··· 65 66 go.opentelemetry.io/otel/sdk v1.21.0 66 67 go.opentelemetry.io/otel/trace v1.21.0 67 68 go.uber.org/automaxprocs v1.5.3 69 + go.uber.org/zap v1.26.0 68 70 golang.org/x/crypto v0.21.0 69 - golang.org/x/sync v0.5.0 71 + golang.org/x/sync v0.7.0 70 72 golang.org/x/text v0.14.0 71 73 golang.org/x/time v0.3.0 72 74 golang.org/x/tools v0.15.0 ··· 78 80 ) 79 81 80 82 require ( 83 + github.com/DataDog/zstd v1.4.5 // indirect 84 + github.com/cockroachdb/errors v1.11.3 // indirect 85 + github.com/cockroachdb/fifo v0.0.0-20240606204812-0bbfbd93a7ce // indirect 86 + github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b // indirect 87 + github.com/cockroachdb/redact v1.1.5 // indirect 88 + github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 // indirect 81 89 github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect 90 + github.com/getsentry/sentry-go v0.27.0 // indirect 82 91 github.com/go-redis/redis v6.15.9+incompatible // indirect 83 - github.com/golang/snappy v0.0.3 // indirect 84 - github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed // indirect 92 + github.com/golang/snappy v0.0.4 // indirect 85 93 github.com/hashicorp/golang-lru v1.0.2 // indirect 94 + github.com/ipfs/go-log v1.0.5 // indirect 86 95 github.com/jackc/puddle/v2 v2.2.1 // indirect 87 96 github.com/klauspost/compress v1.17.3 // indirect 97 + github.com/kr/pretty v0.3.1 // indirect 98 + github.com/kr/text v0.2.0 // indirect 88 99 github.com/labstack/gommon v0.4.1 // indirect 89 100 github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect 90 101 github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect 102 + github.com/pkg/errors v0.9.1 // indirect 103 + github.com/rogpeppe/go-internal v1.10.0 // indirect 91 104 github.com/vmihailenco/go-tinylfu v0.2.2 // indirect 92 105 github.com/vmihailenco/msgpack/v5 v5.4.1 // indirect 93 106 github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect 94 107 github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect 95 - go.uber.org/zap v1.26.0 // indirect 96 108 golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa // indirect 97 - gopkg.in/inf.v0 v0.9.1 // indirect 98 109 ) 99 110 100 111 require ( ··· 167 178 go.uber.org/atomic v1.11.0 // indirect 168 179 go.uber.org/multierr v1.11.0 // indirect 169 180 golang.org/x/mod v0.14.0 // indirect 170 - golang.org/x/net v0.21.0 // indirect 181 + golang.org/x/net v0.23.0 // indirect 171 182 golang.org/x/sys v0.22.0 // indirect 172 183 google.golang.org/genproto/googleapis/api v0.0.0-20231120223509-83a465c0220f // indirect 173 184 google.golang.org/genproto/googleapis/rpc v0.0.0-20231120223509-83a465c0220f // indirect 174 185 google.golang.org/grpc v1.59.0 // indirect 175 - google.golang.org/protobuf v1.31.0 // indirect 186 + google.golang.org/protobuf v1.33.0 // indirect 176 187 gopkg.in/yaml.v2 v2.4.0 // indirect 177 188 gopkg.in/yaml.v3 v3.0.1 // indirect 178 189 lukechampine.com/blake3 v1.2.1 // indirect
+37 -18
go.sum
··· 35 35 dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= 36 36 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 37 37 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= 38 + github.com/DataDog/zstd v1.4.5 h1:EndNeuB0l9syBZhut0wns3gV1hL8zX8LIu6ZiVHWLIQ= 39 + github.com/DataDog/zstd v1.4.5/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo= 38 40 github.com/PuerkitoBio/purell v1.2.1 h1:QsZ4TjvwiMpat6gBCBxEQI0rcS9ehtkKtSpiUnd9N28= 39 41 github.com/PuerkitoBio/purell v1.2.1/go.mod h1:ZwHcC/82TOaovDi//J/804umJFFmbOHPngi8iYYv/Eo= 40 42 github.com/RussellLuo/slidingwindow v0.0.0-20200528002341-535bb99d338b h1:5/++qT1/z812ZqBvqQt6ToRswSuPZ/B33m6xVHRzADU= ··· 71 73 github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= 72 74 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 73 75 github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 74 - github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932 h1:mXoPYz/Ul5HYEDvkta6I8/rnYM5gSdSV2tJ6XbZuEtY= 75 - github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932/go.mod h1:NOuUCSz6Q9T7+igc/hlvDOUdtWKryOrtFyIVABv/p7k= 76 - github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= 77 - github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= 76 + github.com/bradfitz/gomemcache v0.0.0-20230905024940-24af94b03874 h1:N7oVaKyGp8bttX0bfZGmcGkjz7DLQXhAn3DNd3T0ous= 77 + github.com/bradfitz/gomemcache v0.0.0-20230905024940-24af94b03874/go.mod h1:r5xuitiExdLAJ09PR7vBVENGvp4ZuTBeWTGtxuX3K+c= 78 78 github.com/brianvoe/gofakeit/v6 v6.25.0 h1:ZpFjktOpLZUeF8q223o0rUuXtA+m5qW5srjvVi+JkXk= 79 79 github.com/brianvoe/gofakeit/v6 v6.25.0/go.mod h1:Xj58BMSnFqcn/fAQeSK+/PLtC5kSb7FJIq4JyGa8vEs= 80 80 github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= ··· 95 95 github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= 96 96 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= 97 97 github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= 98 + github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f h1:otljaYPt5hWxV3MUfO5dFPFiOXg9CyG5/kCfayTqsJ4= 99 + github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU= 100 + github.com/cockroachdb/errors v1.11.3 h1:5bA+k2Y6r+oz/6Z/RFlNeVCesGARKuC6YymtcDrbC/I= 101 + github.com/cockroachdb/errors v1.11.3/go.mod h1:m4UIW4CDjx+R5cybPsNrRbreomiFqt8o1h1wUVazSd8= 102 + github.com/cockroachdb/fifo v0.0.0-20240606204812-0bbfbd93a7ce h1:giXvy4KSc/6g/esnpM7Geqxka4WSqI1SZc7sMJFd3y4= 103 + github.com/cockroachdb/fifo v0.0.0-20240606204812-0bbfbd93a7ce/go.mod h1:9/y3cnZ5GKakj/H4y9r9GTjCvAFta7KLgSHPJJYc52M= 104 + github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b h1:r6VH0faHjZeQy818SGhaone5OnYfxFR/+AzdY3sf5aE= 105 + github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs= 106 + github.com/cockroachdb/pebble v1.1.2 h1:CUh2IPtR4swHlEj48Rhfzw6l/d0qA31fItcIszQVIsA= 107 + github.com/cockroachdb/pebble v1.1.2/go.mod h1:4exszw1r40423ZsmkG/09AFEG83I0uDgfujJdbL6kYU= 108 + github.com/cockroachdb/redact v1.1.5 h1:u1PMllDkdFfPWaNGMyLD1+so+aq3uUItthCFqzwPJ30= 109 + github.com/cockroachdb/redact v1.1.5/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= 110 + github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 h1:zuQyyAKVxetITBuuhv3BI9cMrmStnpT18zmgmTxunpo= 111 + github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06/go.mod h1:7nc4anLGjupUW/PeY5qiNYsdNXj7zopG+eqsS7To5IQ= 98 112 github.com/corpix/uarand v0.2.0 h1:U98xXwud/AVuCpkpgfPF7J5TQgr7R5tqT8VZP5KWbzE= 99 113 github.com/corpix/uarand v0.2.0/go.mod h1:/3Z1QIqWkDIhf6XWn/08/uMHoQ8JUoTIKc2iPchBOmM= 100 114 github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= ··· 126 140 github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= 127 141 github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= 128 142 github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= 143 + github.com/getsentry/sentry-go v0.27.0 h1:Pv98CIbtB3LkMWmXi4Joa5OOcwbmnX88sF5qbK3r3Ps= 144 + github.com/getsentry/sentry-go v0.27.0/go.mod h1:lc76E2QywIyW8WuBnwl8Lc4bkmQH4+w1gwTf25trprY= 145 + github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= 146 + github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= 129 147 github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= 130 148 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= 131 149 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= ··· 156 174 github.com/go-yaml/yaml v2.1.0+incompatible/go.mod h1:w2MrLa16VYP0jy6N7M5kHaCkaLENm+P+Tv+MfurjSw0= 157 175 github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= 158 176 github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= 159 - github.com/gocql/gocql v1.7.0 h1:O+7U7/1gSN7QTEAaMEsJc1Oq2QHXvCWoF3DFK9HDHus= 160 - github.com/gocql/gocql v1.7.0/go.mod h1:vnlvXyFZeLBF0Wy+RS8hrOdbn0UWsWtdg07XJnFxZ+4= 161 177 github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= 162 178 github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= 163 179 github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= ··· 195 211 github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= 196 212 github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= 197 213 github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= 198 - github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= 199 - github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 214 + github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= 215 + github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 200 216 github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= 201 217 github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= 202 218 github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= ··· 239 255 github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY= 240 256 github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1 h1:6UKoz5ujsI55KNpsJH3UwCq3T8kKbZwNZBNPuTTje8U= 241 257 github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1/go.mod h1:YvJ2f6MplWDhfxiUC3KpyTy76kYUZA4W3pTv/wdKQ9Y= 242 - github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed h1:5upAirOpQc1Q53c0bnx2ufif5kANL7bfZWcc6VJWJd8= 243 - github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4= 244 258 github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= 245 259 github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= 246 260 github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI= ··· 512 526 github.com/orandin/slog-gorm v1.3.2/go.mod h1:MoZ51+b7xE9lwGNPYEhxcUtRNrYzjdcKvA8QXQQGEPA= 513 527 github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 h1:1/WtZae0yGtPq+TI6+Tv1WTxkukpXeMlviSxvL7SRgk= 514 528 github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9/go.mod h1:x3N5drFsm2uilKKuuYo6LdyD8vZAW55sH/9w+pbo1sw= 529 + github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4= 530 + github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= 531 + github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= 515 532 github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 516 533 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 534 + github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 517 535 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 518 536 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 519 537 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= ··· 564 582 github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 565 583 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= 566 584 github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= 585 + github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= 567 586 github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= 568 587 github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= 569 588 github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= ··· 632 651 github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f/go.mod h1:p9UJB6dDgdPgMJZs7UjUOdulKyRr9fqkS+6JKAInPy8= 633 652 github.com/whyrusleeping/go-did v0.0.0-20230824162731-404d1707d5d6 h1:yJ9/LwIGIk/c0CdoavpC9RNSGSruIspSZtxG3Nnldic= 634 653 github.com/whyrusleeping/go-did v0.0.0-20230824162731-404d1707d5d6/go.mod h1:39U9RRVr4CKbXpXYopWn+FSH5s+vWu6+RmguSPWAq5s= 654 + github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ= 655 + github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= 635 656 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= 636 657 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= 637 658 github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= ··· 795 816 golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= 796 817 golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= 797 818 golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= 798 - golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4= 799 - golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= 819 + golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= 820 + golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= 800 821 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= 801 822 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= 802 823 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= ··· 818 839 golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 819 840 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 820 841 golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 821 - golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= 822 - golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 842 + golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= 843 + golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 823 844 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 824 845 golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 825 846 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= ··· 1061 1082 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= 1062 1083 google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= 1063 1084 google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= 1064 - google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= 1065 - google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= 1085 + google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= 1086 + google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= 1066 1087 gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= 1067 1088 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 1068 1089 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= ··· 1071 1092 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 1072 1093 gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= 1073 1094 gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= 1074 - gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= 1075 - gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= 1076 1095 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= 1077 1096 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= 1078 1097 gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+40 -5
indexer/crawler.go
··· 3 3 import ( 4 4 "context" 5 5 "fmt" 6 + "log/slog" 6 7 "sync" 8 + "time" 7 9 8 10 comatproto "github.com/bluesky-social/indigo/api/atproto" 9 11 "github.com/bluesky-social/indigo/models" ··· 27 29 doRepoCrawl func(context.Context, *crawlWork) error 28 30 29 31 concurrency int 32 + 33 + log *slog.Logger 34 + 35 + done chan struct{} 30 36 } 31 37 32 - func NewCrawlDispatcher(repoFn func(context.Context, *crawlWork) error, concurrency int) (*CrawlDispatcher, error) { 38 + func NewCrawlDispatcher(repoFn func(context.Context, *crawlWork) error, concurrency int, log *slog.Logger) (*CrawlDispatcher, error) { 33 39 if concurrency < 1 { 34 40 return nil, fmt.Errorf("must specify a non-zero positive integer for crawl dispatcher concurrency") 35 41 } 36 42 37 - return &CrawlDispatcher{ 43 + out := &CrawlDispatcher{ 38 44 ingest: make(chan *models.ActorInfo), 39 45 repoSync: make(chan *crawlWork), 40 46 complete: make(chan models.Uid), ··· 43 49 concurrency: concurrency, 44 50 todo: make(map[models.Uid]*crawlWork), 45 51 inProgress: make(map[models.Uid]*crawlWork), 46 - }, nil 52 + log: log, 53 + done: make(chan struct{}), 54 + } 55 + go out.CatchupRepoGaugePoller() 56 + 57 + return out, nil 47 58 } 48 59 49 60 func (c *CrawlDispatcher) Run() { ··· 52 63 for i := 0; i < c.concurrency; i++ { 53 64 go c.fetchWorker() 54 65 } 66 + } 67 + 68 + func (c *CrawlDispatcher) Shutdown() { 69 + close(c.done) 55 70 } 56 71 57 72 type catchupJob struct { ··· 173 188 } 174 189 175 190 func (c *CrawlDispatcher) addToCatchupQueue(catchup *catchupJob) *crawlWork { 176 - catchupEventsEnqueued.Inc() 177 191 c.maplk.Lock() 178 192 defer c.maplk.Unlock() 179 193 180 194 // If the actor crawl is enqueued, we can append to the catchup queue which gets emptied during the crawl 181 195 job, ok := c.todo[catchup.user.Uid] 182 196 if ok { 197 + catchupEventsEnqueued.WithLabelValues("todo").Inc() 183 198 job.catchup = append(job.catchup, catchup) 184 199 return nil 185 200 } ··· 187 202 // If the actor crawl is in progress, we can append to the nextr queue which gets emptied after the crawl 188 203 job, ok = c.inProgress[catchup.user.Uid] 189 204 if ok { 205 + catchupEventsEnqueued.WithLabelValues("prog").Inc() 190 206 job.next = append(job.next, catchup) 191 207 return nil 192 208 } 193 209 210 + catchupEventsEnqueued.WithLabelValues("new").Inc() 194 211 // Otherwise, we need to create a new crawl job for this actor and enqueue it 195 212 cw := &crawlWork{ 196 213 act: catchup.user, ··· 205 222 select { 206 223 case job := <-c.repoSync: 207 224 if err := c.doRepoCrawl(context.TODO(), job); err != nil { 208 - log.Errorf("failed to perform repo crawl of %q: %s", job.act.Did, err) 225 + c.log.Error("failed to perform repo crawl", "did", job.act.Did, "err", err) 209 226 } 210 227 211 228 // TODO: do we still just do this if it errors? ··· 269 286 270 287 return false 271 288 } 289 + 290 + func (c *CrawlDispatcher) countReposInSlowPath() int { 291 + c.maplk.Lock() 292 + defer c.maplk.Unlock() 293 + return len(c.inProgress) + len(c.todo) 294 + } 295 + 296 + func (c *CrawlDispatcher) CatchupRepoGaugePoller() { 297 + ticker := time.NewTicker(30 * time.Second) 298 + defer ticker.Stop() 299 + for { 300 + select { 301 + case <-c.done: 302 + case <-ticker.C: 303 + catchupReposGauge.Set(float64(c.countReposInSlowPath())) 304 + } 305 + } 306 + }
+38 -30
indexer/indexer.go
··· 5 5 "database/sql" 6 6 "errors" 7 7 "fmt" 8 + "log/slog" 8 9 "time" 9 10 10 11 comatproto "github.com/bluesky-social/indigo/api/atproto" ··· 19 20 "github.com/bluesky-social/indigo/xrpc" 20 21 21 22 "github.com/ipfs/go-cid" 22 - logging "github.com/ipfs/go-log" 23 23 "go.opentelemetry.io/otel" 24 24 "gorm.io/gorm" 25 25 "gorm.io/gorm/clause" 26 26 ) 27 27 28 - var log = logging.Logger("indexer") 29 - 30 28 const MaxEventSliceLength = 1000000 31 29 const MaxOpsSliceLength = 200 32 30 ··· 45 43 SendRemoteFollow func(context.Context, string, uint) error 46 44 CreateExternalUser func(context.Context, string) (*models.ActorInfo, error) 47 45 ApplyPDSClientSettings func(*xrpc.Client) 46 + 47 + log *slog.Logger 48 48 } 49 49 50 50 func NewIndexer(db *gorm.DB, notifman notifs.NotificationManager, evtman *events.EventManager, didr did.Resolver, fetcher *RepoFetcher, crawl, aggregate, spider bool) (*Indexer, error) { ··· 65 65 return nil 66 66 }, 67 67 ApplyPDSClientSettings: func(*xrpc.Client) {}, 68 + log: slog.Default().With("system", "indexer"), 68 69 } 69 70 70 71 if crawl { 71 - c, err := NewCrawlDispatcher(fetcher.FetchAndIndexRepo, fetcher.MaxConcurrency) 72 + c, err := NewCrawlDispatcher(fetcher.FetchAndIndexRepo, fetcher.MaxConcurrency, ix.log) 72 73 if err != nil { 73 74 return nil, err 74 75 } ··· 80 81 return ix, nil 81 82 } 82 83 84 + func (ix *Indexer) Shutdown() { 85 + if ix.Crawler != nil { 86 + ix.Crawler.Shutdown() 87 + } 88 + } 89 + 83 90 func (ix *Indexer) HandleRepoEvent(ctx context.Context, evt *repomgr.RepoEvent) error { 84 91 ctx, span := otel.Tracer("indexer").Start(ctx, "HandleRepoEvent") 85 92 defer span.End() 86 93 87 - log.Debugw("Handling Repo Event!", "uid", evt.User) 94 + ix.log.Debug("Handling Repo Event!", "uid", evt.User) 88 95 89 96 outops := make([]*comatproto.SyncSubscribeRepos_RepoOp, 0, len(evt.Ops)) 90 97 for _, op := range evt.Ops { ··· 96 103 }) 97 104 98 105 if err := ix.handleRepoOp(ctx, evt, &op); err != nil { 99 - log.Errorw("failed to handle repo op", "err", err) 106 + ix.log.Error("failed to handle repo op", "err", err) 100 107 } 101 108 } 102 109 ··· 113 120 toobig = true 114 121 } 115 122 116 - log.Debugw("Sending event", "did", did) 123 + ix.log.Debug("Sending event", "did", did) 117 124 if err := ix.events.AddEvent(ctx, &events.XRPCStreamEvent{ 118 125 RepoCommit: &comatproto.SyncSubscribeRepos_Commit{ 119 126 Repo: did, ··· 191 198 if e.Type == "mention" { 192 199 _, err := ix.GetUserOrMissing(ctx, e.Value) 193 200 if err != nil { 194 - log.Infow("failed to parse user mention", "ref", e.Value, "err", err) 201 + ix.log.Info("failed to parse user mention", "ref", e.Value, "err", err) 195 202 } 196 203 } 197 204 } ··· 199 206 if rec.Reply != nil { 200 207 if rec.Reply.Parent != nil { 201 208 if err := ix.crawlAtUriRef(ctx, rec.Reply.Parent.Uri); err != nil { 202 - log.Infow("failed to crawl reply parent", "cid", op.RecCid, "replyuri", rec.Reply.Parent.Uri, "err", err) 209 + ix.log.Info("failed to crawl reply parent", "cid", op.RecCid, "replyuri", rec.Reply.Parent.Uri, "err", err) 203 210 } 204 211 } 205 212 206 213 if rec.Reply.Root != nil { 207 214 if err := ix.crawlAtUriRef(ctx, rec.Reply.Root.Uri); err != nil { 208 - log.Infow("failed to crawl reply root", "cid", op.RecCid, "rooturi", rec.Reply.Root.Uri, "err", err) 215 + ix.log.Info("failed to crawl reply root", "cid", op.RecCid, "rooturi", rec.Reply.Root.Uri, "err", err) 209 216 } 210 217 } 211 218 } ··· 214 221 case *bsky.FeedRepost: 215 222 if rec.Subject != nil { 216 223 if err := ix.crawlAtUriRef(ctx, rec.Subject.Uri); err != nil { 217 - log.Infow("failed to crawl repost subject", "cid", op.RecCid, "subjecturi", rec.Subject.Uri, "err", err) 224 + ix.log.Info("failed to crawl repost subject", "cid", op.RecCid, "subjecturi", rec.Subject.Uri, "err", err) 218 225 } 219 226 } 220 227 return nil 221 228 case *bsky.FeedLike: 222 229 if rec.Subject != nil { 223 230 if err := ix.crawlAtUriRef(ctx, rec.Subject.Uri); err != nil { 224 - log.Infow("failed to crawl like subject", "cid", op.RecCid, "subjecturi", rec.Subject.Uri, "err", err) 231 + ix.log.Info("failed to crawl like subject", "cid", op.RecCid, "subjecturi", rec.Subject.Uri, "err", err) 225 232 } 226 233 } 227 234 return nil 228 235 case *bsky.GraphFollow: 229 236 _, err := ix.GetUserOrMissing(ctx, rec.Subject) 230 237 if err != nil { 231 - log.Infow("failed to crawl follow subject", "cid", op.RecCid, "subjectdid", rec.Subject, "err", err) 238 + ix.log.Info("failed to crawl follow subject", "cid", op.RecCid, "subjectdid", rec.Subject, "err", err) 232 239 } 233 240 return nil 234 241 case *bsky.GraphBlock: 235 242 _, err := ix.GetUserOrMissing(ctx, rec.Subject) 236 243 if err != nil { 237 - log.Infow("failed to crawl follow subject", "cid", op.RecCid, "subjectdid", rec.Subject, "err", err) 244 + ix.log.Info("failed to crawl follow subject", "cid", op.RecCid, "subjectdid", rec.Subject, "err", err) 238 245 } 239 246 return nil 240 247 case *bsky.ActorProfile: ··· 246 253 case *bsky.FeedGenerator: 247 254 return nil 248 255 default: 249 - log.Warnw("unrecognized record type (crawling references)", "record", op.Record, "collection", op.Collection) 256 + ix.log.Warn("unrecognized record type (crawling references)", "record", op.Record, "collection", op.Collection) 250 257 return nil 251 258 } 252 259 } ··· 287 294 } 288 295 289 296 func (ix *Indexer) addUserToCrawler(ctx context.Context, ai *models.ActorInfo) error { 290 - log.Debugw("Sending user to crawler: ", "did", ai.Did) 297 + ix.log.Debug("Sending user to crawler: ", "did", ai.Did) 291 298 if ix.Crawler == nil { 292 299 return nil 293 300 } ··· 389 396 } 390 397 391 398 func (ix *Indexer) handleRecordDelete(ctx context.Context, evt *repomgr.RepoEvent, op *repomgr.RepoOp, local bool) error { 392 - log.Debugw("record delete event", "collection", op.Collection) 399 + ix.log.Debug("record delete event", "collection", op.Collection) 393 400 394 401 switch op.Collection { 395 402 case "app.bsky.feed.post": ··· 405 412 fp, err := ix.GetPost(ctx, uri) 406 413 if err != nil { 407 414 if errors.Is(err, gorm.ErrRecordNotFound) { 408 - log.Warnw("deleting post weve never seen before. Weird.", "user", evt.User, "rkey", op.Rkey) 415 + ix.log.Warn("deleting post weve never seen before. Weird.", "user", evt.User, "rkey", op.Rkey) 409 416 return nil 410 417 } 411 418 return err ··· 419 426 return err 420 427 } 421 428 422 - log.Warn("TODO: remove notifications on delete") 429 + ix.log.Warn("TODO: remove notifications on delete") 423 430 /* 424 431 if err := ix.notifman.RemoveRepost(ctx, fp.Author, rr.ID, evt.User); err != nil { 425 432 return nil, err ··· 460 467 return err 461 468 } 462 469 463 - log.Warnf("need to delete vote notification") 470 + ix.log.Warn("need to delete vote notification") 464 471 return nil 465 472 } 466 473 ··· 471 478 } 472 479 473 480 if q.RowsAffected == 0 { 474 - log.Warnw("attempted to delete follow we did not have a record for", "user", evt.User, "rkey", op.Rkey) 481 + ix.log.Warn("attempted to delete follow we did not have a record for", "user", evt.User, "rkey", op.Rkey) 475 482 return nil 476 483 } 477 484 ··· 479 486 } 480 487 481 488 func (ix *Indexer) handleRecordCreate(ctx context.Context, evt *repomgr.RepoEvent, op *repomgr.RepoOp, local bool) ([]uint, error) { 482 - log.Debugw("record create event", "collection", op.Collection) 489 + ix.log.Debug("record create event", "collection", op.Collection) 483 490 484 491 var out []uint 485 492 switch rec := op.Record.(type) { ··· 529 536 case *bsky.FeedGenerator: 530 537 return out, nil 531 538 case *bsky.ActorProfile: 532 - log.Debugf("TODO: got actor profile record creation, need to do something with this") 539 + ix.log.Debug("TODO: got actor profile record creation, need to do something with this") 533 540 default: 541 + ix.log.Warn("unrecognized record", "record", op.Record, "collection", op.Collection) 534 542 return nil, fmt.Errorf("unrecognized record type (creation): %s", op.Collection) 535 543 } 536 544 ··· 603 611 } 604 612 605 613 func (ix *Indexer) handleRecordUpdate(ctx context.Context, evt *repomgr.RepoEvent, op *repomgr.RepoOp, local bool) error { 606 - log.Debugw("record update event", "collection", op.Collection) 614 + ix.log.Debug("record update event", "collection", op.Collection) 607 615 608 616 switch rec := op.Record.(type) { 609 617 case *bsky.FeedPost: ··· 623 631 624 632 if oldReply != newReply { 625 633 // the 'replyness' of the post was changed... that's weird 626 - log.Errorf("need to properly handle case where reply-ness of posts is changed") 634 + ix.log.Error("need to properly handle case where reply-ness of posts is changed") 627 635 return nil 628 636 } 629 637 ··· 634 642 } 635 643 636 644 if replyto.ID != fp.ReplyTo { 637 - log.Errorf("post was changed to be a reply to a different post") 645 + ix.log.Error("post was changed to be a reply to a different post") 638 646 return nil 639 647 } 640 648 } ··· 687 695 688 696 return ix.handleRecordCreateGraphFollow(ctx, rec, evt, op) 689 697 case *bsky.ActorProfile: 690 - log.Debugf("TODO: got actor profile record update, need to do something with this") 698 + ix.log.Debug("TODO: got actor profile record update, need to do something with this") 691 699 default: 692 700 return fmt.Errorf("unrecognized record type (update): %s", op.Collection) 693 701 } ··· 761 769 // we're likely filling in a missing reference 762 770 if !maybe.Missing { 763 771 // TODO: we've already processed this record creation 764 - log.Warnw("potentially erroneous event, duplicate create", "rkey", rkey, "user", user) 772 + ix.log.Warn("potentially erroneous event, duplicate create", "rkey", rkey, "user", user) 765 773 } 766 774 767 775 if err := ix.db.Clauses(clause.OnConflict{ ··· 785 793 } 786 794 787 795 func (ix *Indexer) createMissingPostRecord(ctx context.Context, puri *util.ParsedUri) (*models.FeedPost, error) { 788 - log.Warn("creating missing post record") 796 + ix.log.Warn("creating missing post record") 789 797 ai, err := ix.GetUserOrMissing(ctx, puri.Did) 790 798 if err != nil { 791 799 return nil, err ··· 807 815 if post.Reply != nil { 808 816 replyto, err := ix.GetPost(ctx, post.Reply.Parent.Uri) 809 817 if err != nil { 810 - log.Error("probably shouldn't error when processing a reply to a not-found post") 818 + ix.log.Error("probably shouldn't error when processing a reply to a not-found post") 811 819 return err 812 820 } 813 821
+5 -1
indexer/keymgr.go
··· 3 3 import ( 4 4 "context" 5 5 "fmt" 6 + "log/slog" 6 7 7 8 did "github.com/whyrusleeping/go-did" 8 9 "go.opentelemetry.io/otel" ··· 12 13 didr DidResolver 13 14 14 15 signingKey *did.PrivKey 16 + 17 + log *slog.Logger 15 18 } 16 19 17 20 type DidResolver interface { ··· 22 25 return &KeyManager{ 23 26 didr: didr, 24 27 signingKey: k, 28 + log: slog.Default().With("system", "indexer"), 25 29 } 26 30 } 27 31 ··· 36 40 37 41 err = k.Verify(msg, sig) 38 42 if err != nil { 39 - log.Warnw("signature failed to verify", "err", err, "did", did, "pubKey", k, "sigBytes", sig, "msgBytes", msg) 43 + km.log.Warn("signature failed to verify", "err", err, "did", did, "pubKey", k, "sigBytes", sig, "msgBytes", msg) 40 44 } 41 45 return err 42 46 }
+12 -2
indexer/metrics.go
··· 25 25 Help: "Number of repos fetched", 26 26 }, []string{"status"}) 27 27 28 - var catchupEventsEnqueued = promauto.NewCounter(prometheus.CounterOpts{ 28 + var catchupEventsEnqueued = promauto.NewCounterVec(prometheus.CounterOpts{ 29 29 Name: "indexer_catchup_events_enqueued", 30 30 Help: "Number of catchup events enqueued", 31 - }) 31 + }, []string{"how"}) 32 32 33 33 var catchupEventsProcessed = promauto.NewCounter(prometheus.CounterOpts{ 34 34 Name: "indexer_catchup_events_processed", 35 35 Help: "Number of catchup events processed", 36 36 }) 37 + 38 + var catchupEventsFailed = promauto.NewCounterVec(prometheus.CounterOpts{ 39 + Name: "indexer_catchup_events_failed", 40 + Help: "Number of catchup events processed", 41 + }, []string{"err"}) 42 + 43 + var catchupReposGauge = promauto.NewGauge(prometheus.GaugeOpts{ 44 + Name: "indexer_catchup_repos", 45 + Help: "Number of repos waiting on catchup", 46 + })
+2 -1
indexer/posts_test.go
··· 50 50 t.Fatal(err) 51 51 } 52 52 53 - cs, err := carstore.NewCarStore(cardb, cspath) 53 + cs, err := carstore.NewCarStore(cardb, []string{cspath}) 54 54 if err != nil { 55 55 t.Fatal(err) 56 56 } ··· 81 81 if ix.dir != "" { 82 82 _ = os.RemoveAll(ix.dir) 83 83 } 84 + ix.ix.Shutdown() 84 85 } 85 86 86 87 // TODO: dedupe this out into some testing utility package
+11 -5
indexer/repofetch.go
··· 7 7 "fmt" 8 8 "io" 9 9 "io/fs" 10 + "log/slog" 10 11 "sync" 11 12 12 13 "github.com/bluesky-social/indigo/api/atproto" ··· 27 28 Limiters: make(map[uint]*rate.Limiter), 28 29 ApplyPDSClientSettings: func(*xrpc.Client) {}, 29 30 MaxConcurrency: maxConcurrency, 31 + log: slog.Default().With("system", "indexer"), 30 32 } 31 33 } 32 34 ··· 40 42 MaxConcurrency int 41 43 42 44 ApplyPDSClientSettings func(*xrpc.Client) 45 + 46 + log *slog.Logger 43 47 } 44 48 45 49 func (rf *RepoFetcher) GetLimiter(pdsID uint) *rate.Limiter { ··· 50 54 } 51 55 52 56 func (rf *RepoFetcher) GetOrCreateLimiter(pdsID uint, pdsrate float64) *rate.Limiter { 53 - rf.LimitMux.RLock() 54 - defer rf.LimitMux.RUnlock() 57 + rf.LimitMux.Lock() 58 + defer rf.LimitMux.Unlock() 55 59 56 60 lim, ok := rf.Limiters[pdsID] 57 61 if !ok { ··· 84 88 // Wait to prevent DOSing the PDS when connecting to a new stream with lots of active repos 85 89 limiter.Wait(ctx) 86 90 87 - log.Debugw("SyncGetRepo", "did", did, "since", rev) 91 + rf.log.Debug("SyncGetRepo", "did", did, "since", rev) 88 92 // TODO: max size on these? A malicious PDS could just send us a petabyte sized repo here and kill us 89 93 repo, err := atproto.SyncGetRepo(ctx, c, did, rev) 90 94 if err != nil { ··· 107 111 108 112 var pds models.PDS 109 113 if err := rf.db.First(&pds, "id = ?", ai.PDS).Error; err != nil { 114 + catchupEventsFailed.WithLabelValues("nopds").Inc() 110 115 return fmt.Errorf("expected to find pds record (%d) in db for crawling one of their users: %w", ai.PDS, err) 111 116 } 112 117 113 118 rev, err := rf.repoman.GetRepoRev(ctx, ai.Uid) 114 119 if err != nil && !isNotFound(err) { 120 + catchupEventsFailed.WithLabelValues("noroot").Inc() 115 121 return fmt.Errorf("failed to get repo root: %w", err) 116 122 } 117 123 ··· 123 129 for i, j := range job.catchup { 124 130 catchupEventsProcessed.Inc() 125 131 if err := rf.repoman.HandleExternalUserEvent(ctx, pds.ID, ai.Uid, ai.Did, j.evt.Since, j.evt.Rev, j.evt.Blocks, j.evt.Ops); err != nil { 126 - log.Errorw("buffered event catchup failed", "error", err, "did", ai.Did, "i", i, "jobCount", len(job.catchup), "seq", j.evt.Seq) 132 + rf.log.Error("buffered event catchup failed", "error", err, "did", ai.Did, "i", i, "jobCount", len(job.catchup), "seq", j.evt.Seq) 127 133 resync = true // fall back to a repo sync 128 134 break 129 135 } ··· 151 157 span.RecordError(err) 152 158 153 159 if ipld.IsNotFound(err) || errors.Is(err, io.EOF) || errors.Is(err, fs.ErrNotExist) { 154 - log.Errorw("partial repo fetch was missing data", "did", ai.Did, "pds", pds.Host, "rev", rev) 160 + rf.log.Error("partial repo fetch was missing data", "did", ai.Did, "pds", pds.Host, "rev", rev) 155 161 repo, err := rf.fetchRepo(ctx, c, &pds, ai.Did, "") 156 162 if err != nil { 157 163 return err
+1 -1
lex/type_schema.go
··· 223 223 } 224 224 returndef = fmt.Sprintf("(*%s.%s, error)", impname, outname) 225 225 case "application/cbor", "application/vnd.ipld.car", "*/*": 226 - returndef = fmt.Sprintf("(io.Reader, error)") 226 + returndef = "(io.Reader, error)" 227 227 default: 228 228 return fmt.Errorf("unrecognized output encoding (handler stub): %q", s.Output.Encoding) 229 229 }
+11 -11
mst/cbor_gen.go
··· 18 18 var _ = math.E 19 19 var _ = sort.Sort 20 20 21 - func (t *nodeData) MarshalCBOR(w io.Writer) error { 21 + func (t *NodeData) MarshalCBOR(w io.Writer) error { 22 22 if t == nil { 23 23 _, err := w.Write(cbg.CborNull) 24 24 return err ··· 30 30 return err 31 31 } 32 32 33 - // t.Entries ([]mst.treeEntry) (slice) 33 + // t.Entries ([]mst.TreeEntry) (slice) 34 34 if len("e") > 1000000 { 35 35 return xerrors.Errorf("Value in field \"e\" was too long") 36 36 } ··· 81 81 return nil 82 82 } 83 83 84 - func (t *nodeData) UnmarshalCBOR(r io.Reader) (err error) { 85 - *t = nodeData{} 84 + func (t *NodeData) UnmarshalCBOR(r io.Reader) (err error) { 85 + *t = NodeData{} 86 86 87 87 cr := cbg.NewCborReader(r) 88 88 ··· 101 101 } 102 102 103 103 if extra > cbg.MaxLength { 104 - return fmt.Errorf("nodeData: map struct too large (%d)", extra) 104 + return fmt.Errorf("NodeData: map struct too large (%d)", extra) 105 105 } 106 106 107 107 n := extra ··· 122 122 } 123 123 124 124 switch string(nameBuf[:nameLen]) { 125 - // t.Entries ([]mst.treeEntry) (slice) 125 + // t.Entries ([]mst.TreeEntry) (slice) 126 126 case "e": 127 127 128 128 maj, extra, err = cr.ReadHeader() ··· 139 139 } 140 140 141 141 if extra > 0 { 142 - t.Entries = make([]treeEntry, extra) 142 + t.Entries = make([]TreeEntry, extra) 143 143 } 144 144 145 145 for i := 0; i < int(extra); i++ { ··· 195 195 196 196 return nil 197 197 } 198 - func (t *treeEntry) MarshalCBOR(w io.Writer) error { 198 + func (t *TreeEntry) MarshalCBOR(w io.Writer) error { 199 199 if t == nil { 200 200 _, err := w.Write(cbg.CborNull) 201 201 return err ··· 294 294 return nil 295 295 } 296 296 297 - func (t *treeEntry) UnmarshalCBOR(r io.Reader) (err error) { 298 - *t = treeEntry{} 297 + func (t *TreeEntry) UnmarshalCBOR(r io.Reader) (err error) { 298 + *t = TreeEntry{} 299 299 300 300 cr := cbg.NewCborReader(r) 301 301 ··· 314 314 } 315 315 316 316 if extra > cbg.MaxLength { 317 - return fmt.Errorf("treeEntry: map struct too large (%d)", extra) 317 + return fmt.Errorf("TreeEntry: map struct too large (%d)", extra) 318 318 } 319 319 320 320 n := extra
+8 -8
mst/mst.go
··· 105 105 // the CBOR codec. 106 106 func CBORTypes() []reflect.Type { 107 107 return []reflect.Type{ 108 - reflect.TypeOf(nodeData{}), 109 - reflect.TypeOf(treeEntry{}), 108 + reflect.TypeOf(NodeData{}), 109 + reflect.TypeOf(TreeEntry{}), 110 110 } 111 111 } 112 112 113 113 // MST tree node as gets serialized to CBOR. Note that the CBOR fields are all 114 114 // single-character. 115 - type nodeData struct { 115 + type NodeData struct { 116 116 Left *cid.Cid `cborgen:"l"` // [nullable] pointer to lower-level subtree to the "left" of this path/key 117 - Entries []treeEntry `cborgen:"e"` // ordered list of entries at this node 117 + Entries []TreeEntry `cborgen:"e"` // ordered list of entries at this node 118 118 } 119 119 120 - // treeEntry are elements of nodeData's Entries. 121 - type treeEntry struct { 120 + // TreeEntry are elements of NodeData's Entries. 121 + type TreeEntry struct { 122 122 PrefixLen int64 `cborgen:"p"` // count of characters shared with previous path/key in tree 123 123 KeySuffix []byte `cborgen:"k"` // remaining part of path/key (appended to "previous key") 124 124 Val cid.Cid `cborgen:"v"` // CID pointer at this path/key ··· 189 189 // otherwise this is a virtual/pointer struct and we need to hydrate from 190 190 // blockstore before returning entries 191 191 if mst.pointer != cid.Undef { 192 - var nd nodeData 192 + var nd NodeData 193 193 if err := mst.cst.Get(ctx, mst.pointer, &nd); err != nil { 194 194 return nil, err 195 195 } ··· 210 210 } 211 211 212 212 // golang-specific helper that calls in to deserializeNodeData 213 - func entriesFromNodeData(ctx context.Context, nd *nodeData, cst cbor.IpldStore) ([]nodeEntry, error) { 213 + func entriesFromNodeData(ctx context.Context, nd *NodeData, cst cbor.IpldStore) ([]nodeEntry, error) { 214 214 layer := -1 215 215 if len(nd.Entries) > 0 { 216 216 // NOTE(bnewbold): can compute the layer on the first KeySuffix, because for the first entry that field is a complete key
+2 -2
mst/mst_interop_test.go
··· 166 166 t.Fatal(err) 167 167 } 168 168 169 - simple_nd := nodeData{ 169 + simple_nd := NodeData{ 170 170 Left: nil, 171 - Entries: []treeEntry{ 171 + Entries: []TreeEntry{ 172 172 { 173 173 PrefixLen: 0, 174 174 KeySuffix: []byte("com.example.record/3jqfcqzm3fo2j"),
+4 -4
mst/mst_util.go
··· 66 66 } 67 67 68 68 // Typescript: deserializeNodeData(storage, data, layer) 69 - func deserializeNodeData(ctx context.Context, cst cbor.IpldStore, nd *nodeData, layer int) ([]nodeEntry, error) { 69 + func deserializeNodeData(ctx context.Context, cst cbor.IpldStore, nd *NodeData, layer int) ([]nodeEntry, error) { 70 70 entries := []nodeEntry{} 71 71 if nd.Left != nil { 72 72 // Note: like Typescript, this is actually a lazy load ··· 111 111 } 112 112 113 113 // Typescript: serializeNodeData(entries) -> NodeData 114 - func serializeNodeData(entries []nodeEntry) (*nodeData, error) { 115 - var data nodeData 114 + func serializeNodeData(entries []nodeEntry) (*NodeData, error) { 115 + var data NodeData 116 116 117 117 i := 0 118 118 if len(entries) > 0 && entries[0].isTree() { ··· 157 157 } 158 158 159 159 prefixLen := countPrefixLen(lastKey, leaf.Key) 160 - data.Entries = append(data.Entries, treeEntry{ 160 + data.Entries = append(data.Entries, TreeEntry{ 161 161 PrefixLen: int64(prefixLen), 162 162 KeySuffix: []byte(leaf.Key)[prefixLen:], 163 163 Val: leaf.Val,
+27
pds/data/types.go
··· 1 + package data 2 + 3 + import ( 4 + "github.com/bluesky-social/indigo/models" 5 + "gorm.io/gorm" 6 + "time" 7 + ) 8 + 9 + type User struct { 10 + ID models.Uid `gorm:"primarykey"` 11 + CreatedAt time.Time 12 + UpdatedAt time.Time 13 + DeletedAt gorm.DeletedAt `gorm:"index"` 14 + Handle string `gorm:"uniqueIndex"` 15 + Password string 16 + RecoveryKey string 17 + Email string 18 + Did string `gorm:"uniqueIndex"` 19 + PDS uint 20 + } 21 + 22 + type Peering struct { 23 + gorm.Model 24 + Host string 25 + Did string 26 + Approved bool 27 + }
+6 -2
pds/feedgen.go
··· 3 3 import ( 4 4 "context" 5 5 "fmt" 6 + "log/slog" 6 7 "sort" 7 8 "strings" 8 9 "time" ··· 22 23 ix *indexer.Indexer 23 24 24 25 readRecord ReadRecordFunc 26 + 27 + log *slog.Logger 25 28 } 26 29 27 - func NewFeedGenerator(db *gorm.DB, ix *indexer.Indexer, readRecord ReadRecordFunc) (*FeedGenerator, error) { 30 + func NewFeedGenerator(db *gorm.DB, ix *indexer.Indexer, readRecord ReadRecordFunc, log *slog.Logger) (*FeedGenerator, error) { 28 31 return &FeedGenerator{ 29 32 db: db, 30 33 ix: ix, 31 34 readRecord: readRecord, 35 + log: log, 32 36 }, nil 33 37 } 34 38 ··· 355 359 356 360 func (fg *FeedGenerator) GetVotes(ctx context.Context, uri string, pcid cid.Cid, limit int, before string) ([]*HydratedVote, error) { 357 361 if before != "" { 358 - log.Warn("not respecting 'before' yet") 362 + fg.log.Warn("not respecting 'before' yet") 359 363 } 360 364 361 365 p, err := fg.ix.GetPost(ctx, uri)
+1 -1
pds/handlers_test.go
··· 29 29 t.Fatal(err) 30 30 } 31 31 32 - cs, err := carstore.NewCarStore(db, sharddir) 32 + cs, err := carstore.NewCarStore(db, []string{sharddir}) 33 33 if err != nil { 34 34 t.Fatal(err) 35 35 }
+13 -36
pds/server.go
··· 5 5 "database/sql" 6 6 "errors" 7 7 "fmt" 8 + "log/slog" 8 9 "net" 9 10 "net/http" 10 11 "net/mail" ··· 14 15 15 16 "github.com/bluesky-social/indigo/api/atproto" 16 17 comatproto "github.com/bluesky-social/indigo/api/atproto" 17 - bsky "github.com/bluesky-social/indigo/api/bsky" 18 18 "github.com/bluesky-social/indigo/carstore" 19 19 "github.com/bluesky-social/indigo/events" 20 20 "github.com/bluesky-social/indigo/indexer" 21 21 lexutil "github.com/bluesky-social/indigo/lex/util" 22 22 "github.com/bluesky-social/indigo/models" 23 23 "github.com/bluesky-social/indigo/notifs" 24 + pdsdata "github.com/bluesky-social/indigo/pds/data" 24 25 "github.com/bluesky-social/indigo/plc" 25 26 "github.com/bluesky-social/indigo/repomgr" 26 27 "github.com/bluesky-social/indigo/util" ··· 29 30 gojwt "github.com/golang-jwt/jwt" 30 31 "github.com/gorilla/websocket" 31 32 "github.com/ipfs/go-cid" 32 - logging "github.com/ipfs/go-log" 33 33 "github.com/labstack/echo/v4" 34 34 "github.com/labstack/echo/v4/middleware" 35 35 "github.com/lestrrat-go/jwx/v2/jwt" ··· 37 37 "gorm.io/gorm" 38 38 ) 39 39 40 - var log = logging.Logger("pds") 41 - 42 40 type Server struct { 43 41 db *gorm.DB 44 42 cs carstore.CarStore ··· 56 54 serviceUrl string 57 55 58 56 plc plc.PLCClient 57 + 58 + log *slog.Logger 59 59 } 60 60 61 61 // serverListenerBootTimeout is how long to wait for the requested server socket ··· 96 96 serviceUrl: serviceUrl, 97 97 jwtSigningKey: jwtkey, 98 98 enforcePeering: false, 99 + 100 + log: slog.Default().With("system", "pds"), 99 101 } 100 102 101 103 repoman.SetEventHandler(func(ctx context.Context, evt *repomgr.RepoEvent) { 102 104 if err := ix.HandleRepoEvent(ctx, evt); err != nil { 103 - log.Errorw("handle repo event failed", "user", evt.User, "err", err) 105 + s.log.Error("handle repo event failed", "user", evt.User, "err", err) 104 106 } 105 107 }, true) 106 108 107 109 //ix.SendRemoteFollow = s.sendRemoteFollow 108 110 ix.CreateExternalUser = s.createExternalUser 109 111 110 - feedgen, err := NewFeedGenerator(db, ix, s.readRecordFunc) 112 + feedgen, err := NewFeedGenerator(db, ix, s.readRecordFunc, s.log) 111 113 if err != nil { 112 114 return nil, err 113 115 } ··· 199 201 handle = hurl.Host 200 202 } 201 203 202 - profile, err := bsky.ActorGetProfile(ctx, c, did) 203 - if err != nil { 204 - return nil, err 205 - } 206 - 207 - if handle != profile.Handle { 208 - return nil, fmt.Errorf("mismatch in handle between did document and pds profile (%s != %s)", handle, profile.Handle) 209 - } 210 - 211 204 // TODO: request this users info from their server to fill out our data... 212 205 u := User{ 213 206 Handle: handle, ··· 224 217 subj := &models.ActorInfo{ 225 218 Uid: u.ID, 226 219 Handle: sql.NullString{String: handle, Valid: true}, 227 - DisplayName: *profile.DisplayName, 220 + DisplayName: "missing display name", 228 221 Did: did, 229 222 Type: "", 230 223 PDS: peering.ID, ··· 433 426 434 427 func (s *Server) HandleHealthCheck(c echo.Context) error { 435 428 if err := s.db.Exec("SELECT 1").Error; err != nil { 436 - log.Errorf("healthcheck can't connect to database: %v", err) 429 + s.log.Error("healthcheck can't connect to database", "err", err) 437 430 return c.JSON(500, HealthStatus{Status: "error", Message: "can't connect to database"}) 438 431 } else { 439 432 return c.JSON(200, HealthStatus{Status: "ok"}) ··· 456 449 return c.String(200, u.Did) 457 450 } 458 451 459 - type User struct { 460 - ID models.Uid `gorm:"primarykey"` 461 - CreatedAt time.Time 462 - UpdatedAt time.Time 463 - DeletedAt gorm.DeletedAt `gorm:"index"` 464 - Handle string `gorm:"uniqueIndex"` 465 - Password string 466 - RecoveryKey string 467 - Email string 468 - Did string `gorm:"uniqueIndex"` 469 - PDS uint 470 - } 452 + type User = pdsdata.User 471 453 472 454 type RefreshToken struct { 473 455 gorm.Model ··· 636 618 panic("nyi") 637 619 } 638 620 639 - type Peering struct { 640 - gorm.Model 641 - Host string 642 - Did string 643 - Approved bool 644 - } 621 + type Peering = pdsdata.Peering 645 622 646 623 func (s *Server) EventsHandler(c echo.Context) error { 647 624 conn, err := websocket.Upgrade(c.Response().Writer, c.Request(), c.Response().Header(), 1<<10, 1<<10) ··· 741 718 func (s *Server) UpdateUserHandle(ctx context.Context, u *User, handle string) error { 742 719 if u.Handle == handle { 743 720 // no change? move on 744 - log.Warnw("attempted to change handle to current handle", "did", u.Did, "handle", handle) 721 + s.log.Warn("attempted to change handle to current handle", "did", u.Did, "handle", handle) 745 722 return nil 746 723 } 747 724
+90
plc/memcached.go
··· 1 + package plc 2 + 3 + import ( 4 + "context" 5 + "encoding/json" 6 + "github.com/bradfitz/gomemcache/memcache" 7 + "go.opentelemetry.io/otel/attribute" 8 + "time" 9 + 10 + "github.com/bluesky-social/indigo/did" 11 + "go.opentelemetry.io/otel" 12 + ) 13 + 14 + type MemcachedDidResolver struct { 15 + mcd *memcache.Client 16 + res did.Resolver 17 + maxAge int32 18 + } 19 + 20 + func NewMemcachedDidResolver(res did.Resolver, maxAge time.Duration, servers []string) *MemcachedDidResolver { 21 + expiry := int32(0) 22 + if maxAge.Seconds() > (30 * 24 * 60 * 60) { 23 + // clamp expiry at 30 days minus a minute for memcached 24 + expiry = (30 * 24 * 60 * 60) - 60 25 + } else { 26 + expiry = int32(maxAge.Seconds()) 27 + } 28 + client := memcache.New(servers...) 29 + return &MemcachedDidResolver{ 30 + mcd: client, 31 + res: res, 32 + maxAge: expiry, 33 + } 34 + } 35 + 36 + func (r *MemcachedDidResolver) FlushCacheFor(didstr string) { 37 + r.mcd.Delete(didstr) 38 + r.res.FlushCacheFor(didstr) 39 + } 40 + 41 + func (r *MemcachedDidResolver) tryCache(didstr string) (*did.Document, bool) { 42 + ob, err := r.mcd.Get(didstr) 43 + if (ob == nil) || (err != nil) { 44 + return nil, false 45 + } 46 + var doc did.Document 47 + err = json.Unmarshal(ob.Value, &doc) 48 + if err != nil { 49 + // TODO: log error? 50 + return nil, false 51 + } 52 + 53 + return &doc, true 54 + } 55 + 56 + func (r *MemcachedDidResolver) putCache(did string, doc *did.Document) { 57 + blob, err := json.Marshal(doc) 58 + if err != nil { 59 + // TODO: log error 60 + return 61 + } 62 + item := memcache.Item{ 63 + Key: did, 64 + Value: blob, 65 + Expiration: int32(r.maxAge), 66 + } 67 + r.mcd.Set(&item) 68 + } 69 + 70 + func (r *MemcachedDidResolver) GetDocument(ctx context.Context, didstr string) (*did.Document, error) { 71 + ctx, span := otel.Tracer("cacheResolver").Start(ctx, "getDocument") 72 + defer span.End() 73 + 74 + doc, ok := r.tryCache(didstr) 75 + if ok { 76 + span.SetAttributes(attribute.Bool("cache", true)) 77 + memcacheHitsTotal.Inc() 78 + return doc, nil 79 + } 80 + memcacheMissesTotal.Inc() 81 + span.SetAttributes(attribute.Bool("cache", false)) 82 + 83 + doc, err := r.res.GetDocument(ctx, didstr) 84 + if err != nil { 85 + return nil, err 86 + } 87 + 88 + r.putCache(didstr, doc) 89 + return doc, nil 90 + }
+10
plc/metrics.go
··· 14 14 Name: "plc_cache_misses_total", 15 15 Help: "Total number of cache misses", 16 16 }) 17 + 18 + var memcacheHitsTotal = promauto.NewCounter(prometheus.CounterOpts{ 19 + Name: "plc_memcache_hits_total", 20 + Help: "Total number of cache hits", 21 + }) 22 + 23 + var memcacheMissesTotal = promauto.NewCounter(prometheus.CounterOpts{ 24 + Name: "plc_memcache_misses_total", 25 + Help: "Total number of cache misses", 26 + })
+9 -1
repomgr/bench_test.go
··· 54 54 b.Fatal(err) 55 55 } 56 56 57 - cs, err := carstore.NewCarStore(cardb, cspath) 57 + // TODO: constructor for 'either type' 58 + /* 59 + cs, err := carstore.NewCarStore(cardb, []string{cspath}) 60 + if err != nil { 61 + b.Fatal(err) 62 + } 63 + */ 64 + cs, err := carstore.NewNonArchivalCarstore(cardb) 58 65 if err != nil { 59 66 b.Fatal(err) 60 67 } 61 68 62 69 repoman := NewRepoManager(cs, &util.FakeKeyManager{}) 70 + repoman.noArchive = true 63 71 64 72 ctx := context.TODO() 65 73 if err := repoman.InitNewActor(ctx, 1, "hello.world", "did:foo:bar", "catdog", "", ""); err != nil {
+20 -12
repomgr/ingest_test.go
··· 50 50 t.Fatal(err) 51 51 } 52 52 53 - cs, err := carstore.NewCarStore(cardb, cspath) 53 + cs, err := carstore.NewCarStore(cardb, []string{cspath}) 54 54 if err != nil { 55 55 t.Fatal(err) 56 56 } ··· 69 69 } 70 70 } 71 71 72 - func testCarstore(t *testing.T, dir string) carstore.CarStore { 72 + func testCarstore(t *testing.T, dir string, archive bool) carstore.CarStore { 73 73 cardb, err := gorm.Open(sqlite.Open(filepath.Join(dir, "car.sqlite"))) 74 74 if err != nil { 75 75 t.Fatal(err) ··· 80 80 t.Fatal(err) 81 81 } 82 82 83 - cs, err := carstore.NewCarStore(cardb, cspath) 84 - if err != nil { 85 - t.Fatal(err) 86 - } 83 + if archive { 84 + cs, err := carstore.NewCarStore(cardb, []string{cspath}) 85 + if err != nil { 86 + t.Fatal(err) 87 + } 88 + return cs 89 + } else { 90 + cs, err := carstore.NewNonArchivalCarstore(cardb) 91 + if err != nil { 92 + t.Fatal(err) 93 + } 87 94 88 - return cs 95 + return cs 96 + } 89 97 } 90 98 91 99 func TestIngestWithGap(t *testing.T) { ··· 106 114 Uid: 1, 107 115 }) 108 116 109 - cs := testCarstore(t, dir) 117 + cs := testCarstore(t, dir, true) 110 118 111 119 repoman := NewRepoManager(cs, &util.FakeKeyManager{}) 112 120 ··· 114 122 if err != nil { 115 123 t.Fatal(err) 116 124 } 117 - cs2 := testCarstore(t, dir2) 125 + cs2 := testCarstore(t, dir2, true) 118 126 119 127 var since *string 120 128 ctx := context.TODO() ··· 198 206 Uid: 1, 199 207 }) 200 208 201 - cs := testCarstore(t, dir) 209 + cs := testCarstore(t, dir, true) 202 210 203 211 repoman := NewRepoManager(cs, &util.FakeKeyManager{}) 204 212 ··· 208 216 } 209 217 210 218 p1, _, err := repoman.CreateRecord(ctx, 1, "app.bsky.feed.post", &bsky.FeedPost{ 211 - Text: fmt.Sprintf("hello friend"), 219 + Text: "hello friend", 212 220 }) 213 221 if err != nil { 214 222 t.Fatal(err) 215 223 } 216 224 217 225 p2, _, err := repoman.CreateRecord(ctx, 1, "app.bsky.feed.post", &bsky.FeedPost{ 218 - Text: fmt.Sprintf("hello friend"), 226 + Text: "hello friend", 219 227 }) 220 228 if err != nil { 221 229 t.Fatal(err)
+18
repomgr/metrics.go
··· 9 9 Name: "repomgr_repo_ops_imported", 10 10 Help: "Number of repo ops imported", 11 11 }) 12 + 13 + var openAndSigCheckDuration = promauto.NewHistogram(prometheus.HistogramOpts{ 14 + Name: "repomgr_open_and_sig_check_duration", 15 + Help: "Duration of opening and signature check", 16 + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), 17 + }) 18 + 19 + var calcDiffDuration = promauto.NewHistogram(prometheus.HistogramOpts{ 20 + Name: "repomgr_calc_diff_duration", 21 + Help: "Duration of calculating diff", 22 + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), 23 + }) 24 + 25 + var writeCarSliceDuration = promauto.NewHistogram(prometheus.HistogramOpts{ 26 + Name: "repomgr_write_car_slice_duration", 27 + Help: "Duration of writing car slice", 28 + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), 29 + })
+133 -14
repomgr/repomgr.go
··· 6 6 "errors" 7 7 "fmt" 8 8 "io" 9 + "log/slog" 9 10 "strings" 10 11 "sync" 12 + "time" 11 13 12 14 atproto "github.com/bluesky-social/indigo/api/atproto" 13 15 bsky "github.com/bluesky-social/indigo/api/bsky" ··· 23 25 "github.com/ipfs/go-datastore" 24 26 blockstore "github.com/ipfs/go-ipfs-blockstore" 25 27 ipld "github.com/ipfs/go-ipld-format" 26 - logging "github.com/ipfs/go-log/v2" 27 28 "github.com/ipld/go-car" 28 29 cbg "github.com/whyrusleeping/cbor-gen" 29 30 "go.opentelemetry.io/otel" ··· 31 32 "gorm.io/gorm" 32 33 ) 33 34 34 - var log = logging.Logger("repomgr") 35 + func NewRepoManager(cs carstore.CarStore, kmgr KeyManager) *RepoManager { 35 36 36 - func NewRepoManager(cs carstore.CarStore, kmgr KeyManager) *RepoManager { 37 + var noArchive bool 38 + if _, ok := cs.(*carstore.NonArchivalCarstore); ok { 39 + noArchive = true 40 + } 37 41 38 42 return &RepoManager{ 39 43 cs: cs, 40 44 userLocks: make(map[models.Uid]*userLock), 41 45 kmgr: kmgr, 46 + log: slog.Default().With("system", "repomgr"), 47 + noArchive: noArchive, 42 48 } 43 49 } 44 50 ··· 61 67 62 68 events func(context.Context, *RepoEvent) 63 69 hydrateRecords bool 70 + 71 + log *slog.Logger 72 + noArchive bool 64 73 } 65 74 66 75 type ActorInfo struct { ··· 467 476 return cid.Undef, nil, err 468 477 } 469 478 470 - _, _, err = r.GetRecord(ctx, collection+"/"+rkey) 479 + _, _, err = r.GetRecordBytes(ctx, collection+"/"+rkey) 471 480 if err != nil { 472 481 return cid.Undef, nil, err 473 482 } ··· 528 537 } 529 538 530 539 func (rm *RepoManager) HandleExternalUserEvent(ctx context.Context, pdsid uint, uid models.Uid, did string, since *string, nrev string, carslice []byte, ops []*atproto.SyncSubscribeRepos_RepoOp) error { 540 + if rm.noArchive { 541 + return rm.handleExternalUserEventNoArchive(ctx, pdsid, uid, did, since, nrev, carslice, ops) 542 + } else { 543 + return rm.handleExternalUserEventArchive(ctx, pdsid, uid, did, since, nrev, carslice, ops) 544 + } 545 + } 546 + 547 + func (rm *RepoManager) handleExternalUserEventNoArchive(ctx context.Context, pdsid uint, uid models.Uid, did string, since *string, nrev string, carslice []byte, ops []*atproto.SyncSubscribeRepos_RepoOp) error { 531 548 ctx, span := otel.Tracer("repoman").Start(ctx, "HandleExternalUserEvent") 532 549 defer span.End() 533 550 534 551 span.SetAttributes(attribute.Int64("uid", int64(uid))) 535 552 536 - log.Debugw("HandleExternalUserEvent", "pds", pdsid, "uid", uid, "since", since, "nrev", nrev) 553 + rm.log.Debug("HandleExternalUserEvent", "pds", pdsid, "uid", uid, "since", since, "nrev", nrev) 554 + 555 + unlock := rm.lockUser(ctx, uid) 556 + defer unlock() 557 + 558 + start := time.Now() 559 + root, ds, err := rm.cs.ImportSlice(ctx, uid, since, carslice) 560 + if err != nil { 561 + return fmt.Errorf("importing external carslice: %w", err) 562 + } 563 + 564 + r, err := repo.OpenRepo(ctx, ds, root) 565 + if err != nil { 566 + return fmt.Errorf("opening external user repo (%d, root=%s): %w", uid, root, err) 567 + } 568 + 569 + if err := rm.CheckRepoSig(ctx, r, did); err != nil { 570 + return fmt.Errorf("check repo sig: %w", err) 571 + } 572 + openAndSigCheckDuration.Observe(time.Since(start).Seconds()) 573 + 574 + evtops := make([]RepoOp, 0, len(ops)) 575 + for _, op := range ops { 576 + parts := strings.SplitN(op.Path, "/", 2) 577 + if len(parts) != 2 { 578 + return fmt.Errorf("invalid rpath in mst diff, must have collection and rkey") 579 + } 580 + 581 + switch EventKind(op.Action) { 582 + case EvtKindCreateRecord: 583 + rop := RepoOp{ 584 + Kind: EvtKindCreateRecord, 585 + Collection: parts[0], 586 + Rkey: parts[1], 587 + RecCid: (*cid.Cid)(op.Cid), 588 + } 589 + 590 + if rm.hydrateRecords { 591 + _, rec, err := r.GetRecord(ctx, op.Path) 592 + if err != nil { 593 + return fmt.Errorf("reading changed record from car slice: %w", err) 594 + } 595 + rop.Record = rec 596 + } 597 + 598 + evtops = append(evtops, rop) 599 + case EvtKindUpdateRecord: 600 + rop := RepoOp{ 601 + Kind: EvtKindUpdateRecord, 602 + Collection: parts[0], 603 + Rkey: parts[1], 604 + RecCid: (*cid.Cid)(op.Cid), 605 + } 606 + 607 + if rm.hydrateRecords { 608 + _, rec, err := r.GetRecord(ctx, op.Path) 609 + if err != nil { 610 + return fmt.Errorf("reading changed record from car slice: %w", err) 611 + } 612 + 613 + rop.Record = rec 614 + } 615 + 616 + evtops = append(evtops, rop) 617 + case EvtKindDeleteRecord: 618 + evtops = append(evtops, RepoOp{ 619 + Kind: EvtKindDeleteRecord, 620 + Collection: parts[0], 621 + Rkey: parts[1], 622 + }) 623 + default: 624 + return fmt.Errorf("unrecognized external user event kind: %q", op.Action) 625 + } 626 + } 627 + 628 + if rm.events != nil { 629 + rm.events(ctx, &RepoEvent{ 630 + User: uid, 631 + //OldRoot: prev, 632 + NewRoot: root, 633 + Rev: nrev, 634 + Since: since, 635 + Ops: evtops, 636 + RepoSlice: carslice, 637 + PDS: pdsid, 638 + }) 639 + } 640 + 641 + return nil 642 + } 643 + 644 + func (rm *RepoManager) handleExternalUserEventArchive(ctx context.Context, pdsid uint, uid models.Uid, did string, since *string, nrev string, carslice []byte, ops []*atproto.SyncSubscribeRepos_RepoOp) error { 645 + ctx, span := otel.Tracer("repoman").Start(ctx, "HandleExternalUserEvent") 646 + defer span.End() 647 + 648 + span.SetAttributes(attribute.Int64("uid", int64(uid))) 649 + 650 + rm.log.Debug("HandleExternalUserEvent", "pds", pdsid, "uid", uid, "since", since, "nrev", nrev) 537 651 538 652 unlock := rm.lockUser(ctx, uid) 539 653 defer unlock() 540 654 655 + start := time.Now() 541 656 root, ds, err := rm.cs.ImportSlice(ctx, uid, since, carslice) 542 657 if err != nil { 543 658 return fmt.Errorf("importing external carslice: %w", err) ··· 551 666 if err := rm.CheckRepoSig(ctx, r, did); err != nil { 552 667 return err 553 668 } 669 + openAndSigCheckDuration.Observe(time.Since(start).Seconds()) 554 670 555 671 var skipcids map[cid.Cid]bool 556 672 if ds.BaseCid().Defined() { ··· 571 687 } 572 688 } 573 689 690 + start = time.Now() 574 691 if err := ds.CalcDiff(ctx, skipcids); err != nil { 575 692 return fmt.Errorf("failed while calculating mst diff (since=%v): %w", since, err) 576 - 577 693 } 694 + calcDiffDuration.Observe(time.Since(start).Seconds()) 578 695 579 696 evtops := make([]RepoOp, 0, len(ops)) 580 697 ··· 631 748 } 632 749 } 633 750 751 + start = time.Now() 634 752 rslice, err := ds.CloseWithRoot(ctx, root, nrev) 635 753 if err != nil { 636 754 return fmt.Errorf("close with root: %w", err) 637 755 } 756 + writeCarSliceDuration.Observe(time.Since(start).Seconds()) 638 757 639 758 if rm.events != nil { 640 759 rm.events(ctx, &RepoEvent{ ··· 829 948 ops := make([]RepoOp, 0, len(diffops)) 830 949 for _, op := range diffops { 831 950 repoOpsImported.Inc() 832 - out, err := processOp(ctx, bs, op, rm.hydrateRecords) 951 + out, err := rm.processOp(ctx, bs, op, rm.hydrateRecords) 833 952 if err != nil { 834 - log.Errorw("failed to process repo op", "err", err, "path", op.Rpath, "repo", repoDid) 953 + rm.log.Error("failed to process repo op", "err", err, "path", op.Rpath, "repo", repoDid) 835 954 } 836 955 837 956 if out != nil { ··· 865 984 return nil 866 985 } 867 986 868 - func processOp(ctx context.Context, bs blockstore.Blockstore, op *mst.DiffOp, hydrateRecords bool) (*RepoOp, error) { 987 + func (rm *RepoManager) processOp(ctx context.Context, bs blockstore.Blockstore, op *mst.DiffOp, hydrateRecords bool) (*RepoOp, error) { 869 988 parts := strings.SplitN(op.Rpath, "/", 2) 870 989 if len(parts) != 2 { 871 990 return nil, fmt.Errorf("repo mst had invalid rpath: %q", op.Rpath) ··· 898 1017 return nil, err 899 1018 } 900 1019 901 - log.Warnf("failed processing repo diff: %s", err) 1020 + rm.log.Warn("failed processing repo diff", "err", err) 902 1021 } else { 903 1022 outop.Record = rec 904 1023 } ··· 954 1073 // the repos lifecycle, this will end up erroneously not including 955 1074 // them. We should compute the set of blocks needed to read any repo 956 1075 // ops that happened in the commit and use that for our 'output' blocks 957 - cids, err := walkTree(ctx, seen, root, membs, true) 1076 + cids, err := rm.walkTree(ctx, seen, root, membs, true) 958 1077 if err != nil { 959 1078 return fmt.Errorf("walkTree: %w", err) 960 1079 } ··· 995 1114 996 1115 // walkTree returns all cids linked recursively by the root, skipping any cids 997 1116 // in the 'skip' map, and not erroring on 'not found' if prevMissing is set 998 - func walkTree(ctx context.Context, skip map[cid.Cid]bool, root cid.Cid, bs blockstore.Blockstore, prevMissing bool) ([]cid.Cid, error) { 1117 + func (rm *RepoManager) walkTree(ctx context.Context, skip map[cid.Cid]bool, root cid.Cid, bs blockstore.Blockstore, prevMissing bool) ([]cid.Cid, error) { 999 1118 // TODO: what if someone puts non-cbor links in their repo? 1000 1119 if root.Prefix().Codec != cid.DagCBOR { 1001 1120 return nil, fmt.Errorf("can only handle dag-cbor objects in repos (%s is %d)", root, root.Prefix().Codec) ··· 1009 1128 var links []cid.Cid 1010 1129 if err := cbg.ScanForLinks(bytes.NewReader(blk.RawData()), func(c cid.Cid) { 1011 1130 if c.Prefix().Codec == cid.Raw { 1012 - log.Debugw("skipping 'raw' CID in record", "recordCid", root, "rawCid", c) 1131 + rm.log.Debug("skipping 'raw' CID in record", "recordCid", root, "rawCid", c) 1013 1132 return 1014 1133 } 1015 1134 if skip[c] { ··· 1029 1148 1030 1149 // TODO: should do this non-recursive since i expect these may get deep 1031 1150 for _, c := range links { 1032 - sub, err := walkTree(ctx, skip, c, bs, prevMissing) 1151 + sub, err := rm.walkTree(ctx, skip, c, bs, prevMissing) 1033 1152 if err != nil { 1034 1153 if prevMissing && !ipld.IsNotFound(err) { 1035 1154 return nil, err
+1
search/firehose.go
··· 139 139 idx.relayhost, 140 140 rsc.EventHandler, 141 141 ), 142 + idx.logger, 142 143 ) 143 144 } 144 145
+1 -1
search/handlers.go
··· 39 39 if offset > 10000 { 40 40 return 0, 0, &echo.HTTPError{ 41 41 Code: 400, 42 - Message: fmt.Sprintf("invalid value for 'cursor' (can't paginate so deep)"), 42 + Message: "invalid value for 'cursor' (can't paginate so deep)", 43 43 } 44 44 } 45 45
+3 -1
search/indexing.go
··· 130 130 opts.SyncRequestsPerSecond = 8 131 131 } 132 132 133 - opts.CheckoutPath = fmt.Sprintf("%s/xrpc/com.atproto.sync.getRepo", relayHTTP) 133 + opts.RelayHost = relayHTTP 134 134 if config.IndexMaxConcurrency > 0 { 135 135 opts.ParallelRecordCreates = config.IndexMaxConcurrency 136 136 } else { ··· 145 145 idx.handleDelete, 146 146 opts, 147 147 ) 148 + // reuse identity directory (for efficient caching) 149 + bf.Directory = dir 148 150 149 151 idx.bfs = bfstore 150 152 idx.bf = bf
+1 -3
search/transform.go
··· 267 267 } 268 268 } 269 269 } 270 - for _, t := range p.Tags { 271 - ret = append(ret, t) 272 - } 270 + ret = append(ret, p.Tags...) 273 271 if len(ret) == 0 { 274 272 return nil 275 273 }
+16
splitter/metrics.go
··· 1 + package splitter 2 + 3 + import ( 4 + "github.com/prometheus/client_golang/prometheus" 5 + "github.com/prometheus/client_golang/prometheus/promauto" 6 + ) 7 + 8 + var eventsSentCounter = promauto.NewCounterVec(prometheus.CounterOpts{ 9 + Name: "spl_events_sent_counter", 10 + Help: "The total number of events sent to consumers", 11 + }, []string{"remote_addr", "user_agent"}) 12 + 13 + var activeClientGauge = promauto.NewGauge(prometheus.GaugeOpts{ 14 + Name: "spl_active_clients", 15 + Help: "Current number of active clients", 16 + })
+144
splitter/ringbuf.go
··· 1 + package splitter 2 + 3 + import ( 4 + "context" 5 + "sync" 6 + 7 + events "github.com/bluesky-social/indigo/events" 8 + "github.com/bluesky-social/indigo/models" 9 + ) 10 + 11 + func NewEventRingBuffer(chunkSize, nchunks int) *EventRingBuffer { 12 + return &EventRingBuffer{ 13 + chunkSize: chunkSize, 14 + maxChunkCount: nchunks, 15 + } 16 + } 17 + 18 + type EventRingBuffer struct { 19 + lk sync.Mutex 20 + chunks []*ringChunk 21 + chunkSize int 22 + maxChunkCount int 23 + 24 + broadcast func(*events.XRPCStreamEvent) 25 + } 26 + 27 + type ringChunk struct { 28 + lk sync.Mutex 29 + buf []*events.XRPCStreamEvent 30 + } 31 + 32 + func (rc *ringChunk) append(evt *events.XRPCStreamEvent) { 33 + rc.lk.Lock() 34 + defer rc.lk.Unlock() 35 + rc.buf = append(rc.buf, evt) 36 + } 37 + 38 + func (rc *ringChunk) events() []*events.XRPCStreamEvent { 39 + rc.lk.Lock() 40 + defer rc.lk.Unlock() 41 + return rc.buf 42 + } 43 + 44 + func (er *EventRingBuffer) Persist(ctx context.Context, evt *events.XRPCStreamEvent) error { 45 + er.lk.Lock() 46 + defer er.lk.Unlock() 47 + 48 + if len(er.chunks) == 0 { 49 + er.chunks = []*ringChunk{new(ringChunk)} 50 + } 51 + 52 + last := er.chunks[len(er.chunks)-1] 53 + if len(last.buf) >= er.chunkSize { 54 + last = new(ringChunk) 55 + er.chunks = append(er.chunks, last) 56 + if len(er.chunks) > er.maxChunkCount { 57 + er.chunks = er.chunks[1:] 58 + } 59 + } 60 + 61 + last.append(evt) 62 + 63 + er.broadcast(evt) 64 + return nil 65 + } 66 + 67 + func (er *EventRingBuffer) Flush(context.Context) error { 68 + return nil 69 + } 70 + 71 + func (er *EventRingBuffer) Playback(ctx context.Context, since int64, cb func(*events.XRPCStreamEvent) error) error { 72 + // run playback a few times to get as close to 'live' as possible before returning 73 + for i := 0; i < 10; i++ { 74 + n, err := er.playbackRound(ctx, since, cb) 75 + if err != nil { 76 + return err 77 + } 78 + 79 + // playback had no new events 80 + if n-since == 0 { 81 + return nil 82 + } 83 + since = n 84 + } 85 + 86 + return nil 87 + } 88 + 89 + func (er *EventRingBuffer) playbackRound(ctx context.Context, since int64, cb func(*events.XRPCStreamEvent) error) (int64, error) { 90 + // grab a snapshot of the current chunks 91 + er.lk.Lock() 92 + chunks := er.chunks 93 + er.lk.Unlock() 94 + 95 + i := len(chunks) - 1 96 + for ; i >= 0; i-- { 97 + c := chunks[i] 98 + evts := c.events() 99 + if since > events.SequenceForEvent(evts[len(evts)-1]) { 100 + i++ 101 + break 102 + } 103 + } 104 + if i < 0 { 105 + i = 0 106 + } 107 + 108 + var lastSeq int64 = since 109 + for _, c := range chunks[i:] { 110 + var nread int 111 + evts := c.events() 112 + for nread < len(evts) { 113 + for _, e := range evts[nread:] { 114 + nread++ 115 + seq := events.SequenceForEvent(e) 116 + if seq <= since { 117 + continue 118 + } 119 + 120 + if err := cb(e); err != nil { 121 + return 0, err 122 + } 123 + lastSeq = seq 124 + } 125 + 126 + // recheck evts buffer to see if more were added while we were here 127 + evts = c.events() 128 + } 129 + } 130 + 131 + return lastSeq, nil 132 + } 133 + 134 + func (er *EventRingBuffer) SetEventBroadcaster(brc func(*events.XRPCStreamEvent)) { 135 + er.broadcast = brc 136 + } 137 + 138 + func (er *EventRingBuffer) Shutdown(context.Context) error { 139 + return nil 140 + } 141 + 142 + func (er *EventRingBuffer) TakeDownRepo(context.Context, models.Uid) error { 143 + return nil 144 + }
+675
splitter/splitter.go
··· 1 + package splitter 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + "encoding/json" 7 + "errors" 8 + "fmt" 9 + "go.opentelemetry.io/otel" 10 + "io" 11 + "log/slog" 12 + "math/rand" 13 + "net" 14 + "net/http" 15 + "net/url" 16 + "os" 17 + "strconv" 18 + "strings" 19 + "sync" 20 + "time" 21 + 22 + "github.com/bluesky-social/indigo/api/atproto" 23 + comatproto "github.com/bluesky-social/indigo/api/atproto" 24 + "github.com/bluesky-social/indigo/bgs" 25 + events "github.com/bluesky-social/indigo/events" 26 + "github.com/bluesky-social/indigo/events/schedulers/sequential" 27 + "github.com/bluesky-social/indigo/util" 28 + "github.com/bluesky-social/indigo/xrpc" 29 + "github.com/gorilla/websocket" 30 + "github.com/labstack/echo/v4" 31 + "github.com/labstack/echo/v4/middleware" 32 + promclient "github.com/prometheus/client_golang/prometheus" 33 + "github.com/prometheus/client_golang/prometheus/promhttp" 34 + dto "github.com/prometheus/client_model/go" 35 + ) 36 + 37 + type Splitter struct { 38 + erb *EventRingBuffer 39 + pp *events.PebblePersist 40 + events *events.EventManager 41 + 42 + // Management of Socket Consumers 43 + consumersLk sync.RWMutex 44 + nextConsumerID uint64 45 + consumers map[uint64]*SocketConsumer 46 + 47 + conf SplitterConfig 48 + 49 + log *slog.Logger 50 + 51 + httpC *http.Client 52 + nextCrawlers []*url.URL 53 + } 54 + 55 + type SplitterConfig struct { 56 + UpstreamHost string 57 + CursorFile string 58 + PebbleOptions *events.PebblePersistOptions 59 + } 60 + 61 + func (sc *SplitterConfig) XrpcRootUrl() string { 62 + if strings.HasPrefix(sc.UpstreamHost, "http://") { 63 + return sc.UpstreamHost 64 + } 65 + if strings.HasPrefix(sc.UpstreamHost, "https://") { 66 + return sc.UpstreamHost 67 + } 68 + if strings.HasPrefix(sc.UpstreamHost, "ws://") { 69 + return "http://" + sc.UpstreamHost[5:] 70 + } 71 + if strings.HasPrefix(sc.UpstreamHost, "wss://") { 72 + return "https://" + sc.UpstreamHost[6:] 73 + } 74 + return "https://" + sc.UpstreamHost 75 + } 76 + 77 + func NewSplitter(conf SplitterConfig, nextCrawlers []string) (*Splitter, error) { 78 + var nextCrawlerURLs []*url.URL 79 + log := slog.Default().With("system", "splitter") 80 + if len(nextCrawlers) > 0 { 81 + nextCrawlerURLs = make([]*url.URL, len(nextCrawlers)) 82 + for i, tu := range nextCrawlers { 83 + var err error 84 + nextCrawlerURLs[i], err = url.Parse(tu) 85 + if err != nil { 86 + return nil, fmt.Errorf("failed to parse next-crawler url: %w", err) 87 + } 88 + log.Info("configuring relay for requestCrawl", "host", nextCrawlerURLs[i]) 89 + } 90 + } 91 + 92 + s := &Splitter{ 93 + conf: conf, 94 + consumers: make(map[uint64]*SocketConsumer), 95 + log: log, 96 + httpC: util.RobustHTTPClient(), 97 + nextCrawlers: nextCrawlerURLs, 98 + } 99 + 100 + if conf.PebbleOptions == nil { 101 + // mem splitter 102 + erb := NewEventRingBuffer(20_000, 10_000) 103 + s.erb = erb 104 + s.events = events.NewEventManager(erb) 105 + } else { 106 + pp, err := events.NewPebblePersistance(conf.PebbleOptions) 107 + if err != nil { 108 + return nil, err 109 + } 110 + go pp.GCThread(context.Background()) 111 + s.pp = pp 112 + s.events = events.NewEventManager(pp) 113 + } 114 + 115 + return s, nil 116 + } 117 + func NewDiskSplitter(host, path string, persistHours float64, maxBytes int64) (*Splitter, error) { 118 + ppopts := events.PebblePersistOptions{ 119 + DbPath: path, 120 + PersistDuration: time.Duration(float64(time.Hour) * persistHours), 121 + GCPeriod: 5 * time.Minute, 122 + MaxBytes: uint64(maxBytes), 123 + } 124 + conf := SplitterConfig{ 125 + UpstreamHost: host, 126 + CursorFile: "cursor-file", 127 + PebbleOptions: &ppopts, 128 + } 129 + pp, err := events.NewPebblePersistance(&ppopts) 130 + if err != nil { 131 + return nil, err 132 + } 133 + 134 + go pp.GCThread(context.Background()) 135 + em := events.NewEventManager(pp) 136 + return &Splitter{ 137 + conf: conf, 138 + pp: pp, 139 + events: em, 140 + consumers: make(map[uint64]*SocketConsumer), 141 + log: slog.Default().With("system", "splitter"), 142 + }, nil 143 + } 144 + 145 + func (s *Splitter) Start(addr string) error { 146 + var lc net.ListenConfig 147 + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) 148 + defer cancel() 149 + 150 + curs, err := s.getLastCursor() 151 + if err != nil { 152 + return fmt.Errorf("loading cursor failed: %w", err) 153 + } 154 + 155 + go s.subscribeWithRedialer(context.Background(), s.conf.UpstreamHost, curs) 156 + 157 + li, err := lc.Listen(ctx, "tcp", addr) 158 + if err != nil { 159 + return err 160 + } 161 + return s.StartWithListener(li) 162 + } 163 + 164 + func (s *Splitter) StartMetrics(listen string) error { 165 + http.Handle("/metrics", promhttp.Handler()) 166 + return http.ListenAndServe(listen, nil) 167 + } 168 + 169 + func (s *Splitter) Shutdown() error { 170 + return nil 171 + } 172 + 173 + func (s *Splitter) StartWithListener(listen net.Listener) error { 174 + e := echo.New() 175 + e.HideBanner = true 176 + 177 + e.Use(middleware.CORSWithConfig(middleware.CORSConfig{ 178 + AllowOrigins: []string{"*"}, 179 + AllowHeaders: []string{echo.HeaderOrigin, echo.HeaderContentType, echo.HeaderAccept, echo.HeaderAuthorization}, 180 + })) 181 + 182 + /* 183 + if !s.ssl { 184 + e.Use(middleware.LoggerWithConfig(middleware.LoggerConfig{ 185 + Format: "method=${method}, uri=${uri}, status=${status} latency=${latency_human}\n", 186 + })) 187 + } else { 188 + e.Use(middleware.LoggerWithConfig(middleware.DefaultLoggerConfig)) 189 + } 190 + */ 191 + 192 + e.Use(bgs.MetricsMiddleware) 193 + 194 + e.HTTPErrorHandler = func(err error, ctx echo.Context) { 195 + switch err := err.(type) { 196 + case *echo.HTTPError: 197 + if err2 := ctx.JSON(err.Code, map[string]any{ 198 + "error": err.Message, 199 + }); err2 != nil { 200 + s.log.Error("Failed to write http error", "err", err2) 201 + } 202 + default: 203 + sendHeader := true 204 + if ctx.Path() == "/xrpc/com.atproto.sync.subscribeRepos" { 205 + sendHeader = false 206 + } 207 + 208 + s.log.Warn("HANDLER ERROR", "path", ctx.Path(), "err", err) 209 + 210 + if strings.HasPrefix(ctx.Path(), "/admin/") { 211 + ctx.JSON(500, map[string]any{ 212 + "error": err.Error(), 213 + }) 214 + return 215 + } 216 + 217 + if sendHeader { 218 + ctx.Response().WriteHeader(500) 219 + } 220 + } 221 + } 222 + 223 + // TODO: this API is temporary until we formalize what we want here 224 + 225 + e.POST("/xrpc/com.atproto.sync.requestCrawl", s.RequestCrawlHandler) 226 + e.GET("/xrpc/com.atproto.sync.subscribeRepos", s.EventsHandler) 227 + e.GET("/xrpc/com.atproto.sync.listRepos", s.HandleComAtprotoSyncListRepos) 228 + 229 + e.GET("/xrpc/_health", s.HandleHealthCheck) 230 + e.GET("/_health", s.HandleHealthCheck) 231 + e.GET("/", s.HandleHomeMessage) 232 + 233 + // In order to support booting on random ports in tests, we need to tell the 234 + // Echo instance it's already got a port, and then use its StartServer 235 + // method to re-use that listener. 236 + e.Listener = listen 237 + srv := &http.Server{} 238 + return e.StartServer(srv) 239 + } 240 + 241 + type HealthStatus struct { 242 + Status string `json:"status"` 243 + Message string `json:"msg,omitempty"` 244 + } 245 + 246 + func (s *Splitter) HandleHealthCheck(c echo.Context) error { 247 + return c.JSON(200, HealthStatus{Status: "ok"}) 248 + } 249 + 250 + var homeMessage string = ` 251 + _ _ 252 + _ _ __ _(_)_ _ | |__ _____ __ __ 253 + | '_/ _' | | ' \| '_ \/ _ \ V V / 254 + |_| \__,_|_|_||_|_.__/\___/\_/\_/ 255 + 256 + This is an atproto [https://atproto.com] firehose fanout service, running the 'rainbow' codebase [https://github.com/bluesky-social/indigo] 257 + 258 + The firehose WebSocket path is at: /xrpc/com.atproto.sync.subscribeRepos 259 + ` 260 + 261 + func (s *Splitter) HandleHomeMessage(c echo.Context) error { 262 + return c.String(http.StatusOK, homeMessage) 263 + } 264 + 265 + type XRPCError struct { 266 + Message string `json:"message"` 267 + } 268 + 269 + func (s *Splitter) RequestCrawlHandler(c echo.Context) error { 270 + ctx := c.Request().Context() 271 + var body comatproto.SyncRequestCrawl_Input 272 + if err := c.Bind(&body); err != nil { 273 + return c.JSON(http.StatusBadRequest, XRPCError{Message: fmt.Sprintf("invalid body: %s", err)}) 274 + } 275 + 276 + host := body.Hostname 277 + if host == "" { 278 + return echo.NewHTTPError(http.StatusBadRequest, "must pass hostname") 279 + } 280 + 281 + if !strings.HasPrefix(host, "http://") && !strings.HasPrefix(host, "https://") { 282 + host = "https://" + host 283 + } 284 + 285 + u, err := url.Parse(host) 286 + if err != nil { 287 + return echo.NewHTTPError(http.StatusBadRequest, "failed to parse hostname") 288 + } 289 + 290 + if u.Scheme == "http" { 291 + return echo.NewHTTPError(http.StatusBadRequest, "this server requires https") 292 + } 293 + if u.Path != "" { 294 + return echo.NewHTTPError(http.StatusBadRequest, "must pass hostname without path") 295 + } 296 + 297 + if u.Query().Encode() != "" { 298 + return echo.NewHTTPError(http.StatusBadRequest, "must pass hostname without query") 299 + } 300 + 301 + host = u.Host // potentially hostname:port 302 + 303 + clientHost := fmt.Sprintf("%s://%s", u.Scheme, host) 304 + 305 + xrpcC := &xrpc.Client{ 306 + Host: clientHost, 307 + Client: http.DefaultClient, // not using the client that auto-retries 308 + } 309 + 310 + desc, err := atproto.ServerDescribeServer(ctx, xrpcC) 311 + if err != nil { 312 + errMsg := fmt.Sprintf("requested host (%s) failed to respond to describe request", clientHost) 313 + return echo.NewHTTPError(http.StatusBadRequest, errMsg) 314 + } 315 + 316 + // Maybe we could do something with this response later 317 + _ = desc 318 + 319 + if len(s.nextCrawlers) != 0 { 320 + blob, err := json.Marshal(body) 321 + if err != nil { 322 + s.log.Warn("could not forward requestCrawl, json err", "err", err) 323 + } else { 324 + go func(bodyBlob []byte) { 325 + for _, remote := range s.nextCrawlers { 326 + if remote == nil { 327 + continue 328 + } 329 + 330 + pu := remote.JoinPath("/xrpc/com.atproto.sync.requestCrawl") 331 + response, err := s.httpC.Post(pu.String(), "application/json", bytes.NewReader(bodyBlob)) 332 + if response != nil && response.Body != nil { 333 + response.Body.Close() 334 + } 335 + if err != nil || response == nil { 336 + s.log.Warn("requestCrawl forward failed", "host", remote, "err", err) 337 + } else if response.StatusCode != http.StatusOK { 338 + s.log.Warn("requestCrawl forward failed", "host", remote, "status", response.Status) 339 + } else { 340 + s.log.Info("requestCrawl forward successful", "host", remote) 341 + } 342 + } 343 + }(blob) 344 + } 345 + } 346 + 347 + return c.JSON(200, HealthStatus{Status: "ok"}) 348 + } 349 + 350 + func (s *Splitter) HandleComAtprotoSyncListRepos(c echo.Context) error { 351 + ctx, span := otel.Tracer("server").Start(c.Request().Context(), "HandleComAtprotoSyncListRepos") 352 + defer span.End() 353 + 354 + cursorQuery := c.QueryParam("cursor") 355 + limitQuery := c.QueryParam("limit") 356 + 357 + var err error 358 + 359 + limit := int64(500) 360 + if limitQuery != "" { 361 + limit, err = strconv.ParseInt(limitQuery, 10, 64) 362 + if err != nil || limit < 1 || limit > 1000 { 363 + return c.JSON(http.StatusBadRequest, XRPCError{Message: fmt.Sprintf("invalid limit: %s", limitQuery)}) 364 + } 365 + } 366 + 367 + client := xrpc.Client{ 368 + Client: s.httpC, 369 + Host: s.conf.XrpcRootUrl(), 370 + } 371 + 372 + out, handleErr := atproto.SyncListRepos(ctx, &client, cursorQuery, limit) 373 + if handleErr != nil { 374 + return handleErr 375 + } 376 + return c.JSON(200, out) 377 + } 378 + 379 + func (s *Splitter) EventsHandler(c echo.Context) error { 380 + var since *int64 381 + if sinceVal := c.QueryParam("cursor"); sinceVal != "" { 382 + sval, err := strconv.ParseInt(sinceVal, 10, 64) 383 + if err != nil { 384 + return err 385 + } 386 + since = &sval 387 + } 388 + 389 + ctx, cancel := context.WithCancel(c.Request().Context()) 390 + defer cancel() 391 + 392 + // TODO: authhhh 393 + conn, err := websocket.Upgrade(c.Response(), c.Request(), c.Response().Header(), 10<<10, 10<<10) 394 + if err != nil { 395 + return fmt.Errorf("upgrading websocket: %w", err) 396 + } 397 + 398 + lastWriteLk := sync.Mutex{} 399 + lastWrite := time.Now() 400 + 401 + // Start a goroutine to ping the client every 30 seconds to check if it's 402 + // still alive. If the client doesn't respond to a ping within 5 seconds, 403 + // we'll close the connection and teardown the consumer. 404 + go func() { 405 + ticker := time.NewTicker(30 * time.Second) 406 + defer ticker.Stop() 407 + 408 + for { 409 + select { 410 + case <-ticker.C: 411 + lastWriteLk.Lock() 412 + lw := lastWrite 413 + lastWriteLk.Unlock() 414 + 415 + if time.Since(lw) < 30*time.Second { 416 + continue 417 + } 418 + 419 + if err := conn.WriteControl(websocket.PingMessage, []byte{}, time.Now().Add(5*time.Second)); err != nil { 420 + s.log.Error("failed to ping client", "err", err) 421 + cancel() 422 + return 423 + } 424 + case <-ctx.Done(): 425 + return 426 + } 427 + } 428 + }() 429 + 430 + conn.SetPingHandler(func(message string) error { 431 + err := conn.WriteControl(websocket.PongMessage, []byte(message), time.Now().Add(time.Second*60)) 432 + if err == websocket.ErrCloseSent { 433 + return nil 434 + } else if e, ok := err.(net.Error); ok && e.Temporary() { 435 + return nil 436 + } 437 + return err 438 + }) 439 + 440 + // Start a goroutine to read messages from the client and discard them. 441 + go func() { 442 + for { 443 + _, _, err := conn.ReadMessage() 444 + if err != nil { 445 + s.log.Error("failed to read message from client", "err", err) 446 + cancel() 447 + return 448 + } 449 + } 450 + }() 451 + 452 + ident := c.RealIP() + "-" + c.Request().UserAgent() 453 + 454 + evts, cleanup, err := s.events.Subscribe(ctx, ident, func(evt *events.XRPCStreamEvent) bool { return true }, since) 455 + if err != nil { 456 + return err 457 + } 458 + defer cleanup() 459 + 460 + // Keep track of the consumer for metrics and admin endpoints 461 + consumer := SocketConsumer{ 462 + RemoteAddr: c.RealIP(), 463 + UserAgent: c.Request().UserAgent(), 464 + ConnectedAt: time.Now(), 465 + } 466 + sentCounter := eventsSentCounter.WithLabelValues(consumer.RemoteAddr, consumer.UserAgent) 467 + consumer.EventsSent = sentCounter 468 + 469 + consumerID := s.registerConsumer(&consumer) 470 + defer s.cleanupConsumer(consumerID) 471 + 472 + s.log.Info("new consumer", 473 + "remote_addr", consumer.RemoteAddr, 474 + "user_agent", consumer.UserAgent, 475 + "cursor", since, 476 + "consumer_id", consumerID, 477 + ) 478 + activeClientGauge.Inc() 479 + defer activeClientGauge.Dec() 480 + 481 + for { 482 + select { 483 + case evt, ok := <-evts: 484 + if !ok { 485 + s.log.Error("event stream closed unexpectedly") 486 + return nil 487 + } 488 + 489 + wc, err := conn.NextWriter(websocket.BinaryMessage) 490 + if err != nil { 491 + s.log.Error("failed to get next writer", "err", err) 492 + return err 493 + } 494 + 495 + if evt.Preserialized != nil { 496 + _, err = wc.Write(evt.Preserialized) 497 + } else { 498 + err = evt.Serialize(wc) 499 + } 500 + if err != nil { 501 + return fmt.Errorf("failed to write event: %w", err) 502 + } 503 + 504 + if err := wc.Close(); err != nil { 505 + s.log.Warn("failed to flush-close our event write", "err", err) 506 + return nil 507 + } 508 + 509 + lastWriteLk.Lock() 510 + lastWrite = time.Now() 511 + lastWriteLk.Unlock() 512 + sentCounter.Inc() 513 + case <-ctx.Done(): 514 + return nil 515 + } 516 + } 517 + } 518 + 519 + type SocketConsumer struct { 520 + UserAgent string 521 + RemoteAddr string 522 + ConnectedAt time.Time 523 + EventsSent promclient.Counter 524 + } 525 + 526 + func (s *Splitter) registerConsumer(c *SocketConsumer) uint64 { 527 + s.consumersLk.Lock() 528 + defer s.consumersLk.Unlock() 529 + 530 + id := s.nextConsumerID 531 + s.nextConsumerID++ 532 + 533 + s.consumers[id] = c 534 + 535 + return id 536 + } 537 + 538 + func (s *Splitter) cleanupConsumer(id uint64) { 539 + s.consumersLk.Lock() 540 + defer s.consumersLk.Unlock() 541 + 542 + c := s.consumers[id] 543 + 544 + var m = &dto.Metric{} 545 + if err := c.EventsSent.Write(m); err != nil { 546 + s.log.Error("failed to get sent counter", "err", err) 547 + } 548 + 549 + s.log.Info("consumer disconnected", 550 + "consumer_id", id, 551 + "remote_addr", c.RemoteAddr, 552 + "user_agent", c.UserAgent, 553 + "events_sent", m.Counter.GetValue()) 554 + 555 + delete(s.consumers, id) 556 + } 557 + 558 + func sleepForBackoff(b int) time.Duration { 559 + if b == 0 { 560 + return 0 561 + } 562 + 563 + if b < 50 { 564 + return time.Millisecond * time.Duration(rand.Intn(100)+(5*b)) 565 + } 566 + 567 + return time.Second * 5 568 + } 569 + 570 + func (s *Splitter) subscribeWithRedialer(ctx context.Context, host string, cursor int64) { 571 + d := websocket.Dialer{} 572 + 573 + protocol := "wss" 574 + 575 + var backoff int 576 + for { 577 + select { 578 + case <-ctx.Done(): 579 + return 580 + default: 581 + } 582 + 583 + header := http.Header{ 584 + "User-Agent": []string{"bgs-rainbow-v0"}, 585 + } 586 + 587 + var url string 588 + if cursor < 0 { 589 + url = fmt.Sprintf("%s://%s/xrpc/com.atproto.sync.subscribeRepos", protocol, host) 590 + } else { 591 + url = fmt.Sprintf("%s://%s/xrpc/com.atproto.sync.subscribeRepos?cursor=%d", protocol, host, cursor) 592 + } 593 + con, res, err := d.DialContext(ctx, url, header) 594 + if err != nil { 595 + s.log.Warn("dialing failed", "host", host, "err", err, "backoff", backoff) 596 + time.Sleep(sleepForBackoff(backoff)) 597 + backoff++ 598 + 599 + continue 600 + } 601 + 602 + s.log.Info("event subscription response", "code", res.StatusCode) 603 + 604 + if err := s.handleConnection(ctx, host, con, &cursor); err != nil { 605 + s.log.Warn("connection failed", "host", host, "err", err) 606 + } 607 + } 608 + } 609 + 610 + func (s *Splitter) handleConnection(ctx context.Context, host string, con *websocket.Conn, lastCursor *int64) error { 611 + ctx, cancel := context.WithCancel(ctx) 612 + defer cancel() 613 + 614 + sched := sequential.NewScheduler("splitter", func(ctx context.Context, evt *events.XRPCStreamEvent) error { 615 + seq := events.SequenceForEvent(evt) 616 + if seq < 0 { 617 + // ignore info events and other unsupported types 618 + return nil 619 + } 620 + 621 + if err := s.events.AddEvent(ctx, evt); err != nil { 622 + return err 623 + } 624 + 625 + if seq%5000 == 0 { 626 + // TODO: don't need this after we move to getting seq from pebble 627 + if err := s.writeCursor(seq); err != nil { 628 + s.log.Error("write cursor failed", "err", err) 629 + } 630 + } 631 + 632 + *lastCursor = seq 633 + return nil 634 + }) 635 + 636 + return events.HandleRepoStream(ctx, con, sched, nil) 637 + } 638 + 639 + func (s *Splitter) getLastCursor() (int64, error) { 640 + if s.pp != nil { 641 + seq, millis, _, err := s.pp.GetLast(context.Background()) 642 + if err == nil { 643 + s.log.Debug("got last cursor from pebble", "seq", seq, "millis", millis) 644 + return seq, nil 645 + } else if errors.Is(err, events.ErrNoLast) { 646 + s.log.Info("pebble no last") 647 + } else { 648 + s.log.Error("pebble seq fail", "err", err) 649 + } 650 + } 651 + 652 + fi, err := os.Open(s.conf.CursorFile) 653 + if err != nil { 654 + if os.IsNotExist(err) { 655 + return -1, nil 656 + } 657 + return -1, err 658 + } 659 + 660 + b, err := io.ReadAll(fi) 661 + if err != nil { 662 + return -1, err 663 + } 664 + 665 + v, err := strconv.ParseInt(string(b), 10, 64) 666 + if err != nil { 667 + return -1, err 668 + } 669 + 670 + return v, nil 671 + } 672 + 673 + func (s *Splitter) writeCursor(curs int64) error { 674 + return os.WriteFile(s.conf.CursorFile, []byte(fmt.Sprint(curs)), 0664) 675 + }
+37 -12
testing/integ_test.go
··· 15 15 "github.com/bluesky-social/indigo/repo" 16 16 "github.com/bluesky-social/indigo/xrpc" 17 17 "github.com/ipfs/go-cid" 18 - "github.com/ipfs/go-log/v2" 19 18 car "github.com/ipld/go-car" 20 19 "github.com/stretchr/testify/assert" 21 20 ) 22 21 23 - func init() { 24 - log.SetAllLoggers(log.LevelInfo) 22 + func TestRelayBasic(t *testing.T) { 23 + t.Helper() 24 + testRelayBasic(t, true) 25 + } 26 + 27 + func TestRelayBasicNonArchive(t *testing.T) { 28 + t.Helper() 29 + testRelayBasic(t, false) 25 30 } 26 31 27 - func TestRelayBasic(t *testing.T) { 32 + func testRelayBasic(t *testing.T, archive bool) { 28 33 if testing.Short() { 29 34 t.Skip("skipping Relay test in 'short' test mode") 30 35 } ··· 33 38 p1 := MustSetupPDS(t, ".tpds", didr) 34 39 p1.Run(t) 35 40 36 - b1 := MustSetupRelay(t, didr) 41 + b1 := MustSetupRelay(t, didr, archive) 37 42 b1.Run(t) 38 43 39 44 b1.tr.TrialHosts = []string{p1.RawHost()} ··· 116 121 } 117 122 118 123 func TestRelayMultiPDS(t *testing.T) { 124 + t.Helper() 125 + testRelayMultiPDS(t, true) 126 + } 127 + 128 + func TestRelayMultiPDSNonArchive(t *testing.T) { 129 + t.Helper() 130 + testRelayMultiPDS(t, false) 131 + } 132 + 133 + func testRelayMultiPDS(t *testing.T, archive bool) { 119 134 if testing.Short() { 120 135 t.Skip("skipping Relay test in 'short' test mode") 121 136 } ··· 130 145 p2 := MustSetupPDS(t, ".pdsdos", didr) 131 146 p2.Run(t) 132 147 133 - b1 := MustSetupRelay(t, didr) 148 + b1 := MustSetupRelay(t, didr, archive) 134 149 b1.Run(t) 135 150 136 151 b1.tr.TrialHosts = []string{p1.RawHost(), p2.RawHost()} ··· 198 213 p2 := MustSetupPDS(t, ".pdsdos", didr) 199 214 p2.Run(t) 200 215 201 - b1 := MustSetupRelay(t, didr) 216 + b1 := MustSetupRelay(t, didr, true) 202 217 b1.Run(t) 203 218 204 219 b1.tr.TrialHosts = []string{p1.RawHost(), p2.RawHost()} ··· 256 271 p1 := MustSetupPDS(t, ".pdsuno", didr) 257 272 p1.Run(t) 258 273 259 - b1 := MustSetupRelay(t, didr) 274 + b1 := MustSetupRelay(t, didr, true) 260 275 b1.Run(t) 261 276 262 277 b1.tr.TrialHosts = []string{p1.RawHost()} ··· 293 308 p1 := MustSetupPDS(t, ".pdsuno", didr) 294 309 p1.Run(t) 295 310 296 - b1 := MustSetupRelay(t, didr) 311 + b1 := MustSetupRelay(t, didr, true) 297 312 b1.Run(t) 298 313 299 314 b1.tr.TrialHosts = []string{p1.RawHost()} ··· 391 406 } 392 407 393 408 func TestRelayTakedown(t *testing.T) { 409 + testRelayTakedown(t, true) 410 + } 411 + 412 + func TestRelayTakedownNonArchive(t *testing.T) { 413 + testRelayTakedown(t, false) 414 + } 415 + 416 + func testRelayTakedown(t *testing.T, archive bool) { 394 417 if testing.Short() { 395 418 t.Skip("skipping Relay test in 'short' test mode") 396 419 } ··· 401 424 p1 := MustSetupPDS(t, ".tpds", didr) 402 425 p1.Run(t) 403 426 404 - b1 := MustSetupRelay(t, didr) 427 + b1 := MustSetupRelay(t, didr, true) 405 428 b1.Run(t) 406 429 407 430 b1.tr.TrialHosts = []string{p1.RawHost()} ··· 480 503 } 481 504 didr := TestPLC(t) 482 505 483 - b1 := MustSetupRelay(t, didr) 506 + b1 := MustSetupRelay(t, didr, true) 484 507 b1.Run(t) 485 508 486 509 b1.BanDomain(t, "foo.com") ··· 523 546 p1 := MustSetupPDS(t, ".tpds", didr) 524 547 p1.Run(t) 525 548 526 - b1 := MustSetupRelay(t, didr) 549 + b1 := MustSetupRelay(t, didr, true) 527 550 b1.Run(t) 528 551 529 552 b1.tr.TrialHosts = []string{p1.RawHost()} ··· 541 564 e1 := evts.Next() 542 565 assert.NotNil(e1.RepoCommit) 543 566 assert.Equal(e1.RepoCommit.Repo, bob.DID()) 567 + fmt.Println(e1.RepoCommit.Ops[0]) 544 568 545 569 ctx := context.TODO() 546 570 rm := p1.server.Repoman() ··· 549 573 } 550 574 551 575 e2 := evts.Next() 576 + //fmt.Println(e2.RepoCommit.Ops[0]) 552 577 assert.Equal(len(e2.RepoCommit.Ops), 0) 553 578 assert.Equal(e2.RepoCommit.Repo, bob.DID()) 554 579 }
+19 -9
testing/utils.go
··· 117 117 return nil, err 118 118 } 119 119 120 - cs, err := carstore.NewCarStore(cardb, cspath) 120 + cs, err := carstore.NewCarStore(cardb, []string{cspath}) 121 121 if err != nil { 122 122 return nil, err 123 123 } ··· 471 471 t.Helper() 472 472 473 473 ctx := context.TODO() 474 - resp, err := bsky.NotificationListNotifications(ctx, u.client, "", 100, false, "") 474 + resp, err := bsky.NotificationListNotifications(ctx, u.client, "", 100, false, nil, "") 475 475 if err != nil { 476 476 t.Fatal(err) 477 477 } ··· 518 518 return t.listener.Addr().String() 519 519 } 520 520 521 - func MustSetupRelay(t *testing.T, didr plc.PLCClient) *TestRelay { 521 + func MustSetupRelay(t *testing.T, didr plc.PLCClient, archive bool) *TestRelay { 522 522 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 523 523 defer cancel() 524 - tbgs, err := SetupRelay(ctx, didr) 524 + tbgs, err := SetupRelay(ctx, didr, archive) 525 525 if err != nil { 526 526 t.Fatal(err) 527 527 } ··· 529 529 return tbgs 530 530 } 531 531 532 - func SetupRelay(ctx context.Context, didr plc.PLCClient) (*TestRelay, error) { 532 + func SetupRelay(ctx context.Context, didr plc.PLCClient, archive bool) (*TestRelay, error) { 533 533 dir, err := os.MkdirTemp("", "integtest") 534 534 if err != nil { 535 535 return nil, err ··· 550 550 return nil, err 551 551 } 552 552 553 - cs, err := carstore.NewCarStore(cardb, cspath) 554 - if err != nil { 555 - return nil, err 553 + var cs carstore.CarStore 554 + if archive { 555 + arccs, err := carstore.NewCarStore(cardb, []string{cspath}) 556 + if err != nil { 557 + return nil, err 558 + } 559 + cs = arccs 560 + } else { 561 + nacs, err := carstore.NewNonArchivalCarstore(cardb) 562 + if err != nil { 563 + return nil, err 564 + } 565 + cs = nacs 556 566 } 557 567 558 568 //kmgr := indexer.NewKeyManager(didr, nil) ··· 691 701 }, 692 702 } 693 703 seqScheduler := sequential.NewScheduler("test", rsc.EventHandler) 694 - if err := events.HandleRepoStream(ctx, con, seqScheduler); err != nil { 704 + if err := events.HandleRepoStream(ctx, con, seqScheduler, nil); err != nil { 695 705 fmt.Println(err) 696 706 } 697 707 }()
+35
util/cliutil/ipfslog.go
··· 1 + package cliutil 2 + 3 + import ( 4 + "io" 5 + 6 + ipfslog "github.com/ipfs/go-log/v2" 7 + "go.uber.org/zap/zapcore" 8 + ) 9 + 10 + func SetIpfsWriter(out io.Writer, format string, level string) { 11 + var ze zapcore.Encoder 12 + switch format { 13 + case "json": 14 + ze = zapcore.NewJSONEncoder(zapcore.EncoderConfig{}) 15 + case "text": 16 + ze = zapcore.NewConsoleEncoder(zapcore.EncoderConfig{}) 17 + default: 18 + ze = zapcore.NewConsoleEncoder(zapcore.EncoderConfig{}) 19 + } 20 + var zl zapcore.LevelEnabler 21 + switch level { 22 + case "debug": 23 + zl = zapcore.DebugLevel 24 + case "info": 25 + zl = zapcore.InfoLevel 26 + case "warn": 27 + zl = zapcore.WarnLevel 28 + case "error": 29 + zl = zapcore.ErrorLevel 30 + default: 31 + zl = zapcore.InfoLevel 32 + } 33 + nc := zapcore.NewCore(ze, zapcore.AddSync(out), zl) 34 + ipfslog.SetPrimaryCore(nc) 35 + }
+349
util/cliutil/util.go
··· 2 2 3 3 import ( 4 4 "encoding/json" 5 + "errors" 5 6 "fmt" 7 + "io" 8 + "io/fs" 9 + "log/slog" 6 10 "net/http" 7 11 "os" 8 12 "path/filepath" 13 + "regexp" 14 + "sort" 15 + "strconv" 9 16 "strings" 10 17 "time" 11 18 ··· 230 237 231 238 return db, nil 232 239 } 240 + 241 + type LogOptions struct { 242 + // e.g. 1_000_000_000 243 + LogRotateBytes int64 244 + 245 + // path to write to, if rotating, %T gets UnixMilli at file open time 246 + // NOTE: substitution is simple replace("%T", "") 247 + LogPath string 248 + 249 + // text|json 250 + LogFormat string 251 + 252 + // info|debug|warn|error 253 + LogLevel string 254 + 255 + // Keep N old logs (not including current); <0 disables removal, 0==remove all old log files immediately 256 + KeepOld int 257 + } 258 + 259 + func firstenv(env_var_names ...string) string { 260 + for _, env_var_name := range env_var_names { 261 + val := os.Getenv(env_var_name) 262 + if val != "" { 263 + return val 264 + } 265 + } 266 + return "" 267 + } 268 + 269 + // SetupSlog integrates passed in options and env vars. 270 + // 271 + // passing default cliutil.LogOptions{} is ok. 272 + // 273 + // BSKYLOG_LOG_LEVEL=info|debug|warn|error 274 + // 275 + // BSKYLOG_LOG_FMT=text|json 276 + // 277 + // BSKYLOG_FILE=path (or "-" or "" for stdout), %T gets UnixMilli; if a path with '/', {prefix}/current becomes a link to active log file 278 + // 279 + // BSKYLOG_ROTATE_BYTES=int maximum size of log chunk before rotating 280 + // 281 + // BSKYLOG_ROTATE_KEEP=int keep N olg logs (not including current) 282 + // 283 + // The env vars were derived from ipfs logging library, and also respond to some GOLOG_ vars from that library, 284 + // but BSKYLOG_ variables are preferred because imported code still using the ipfs log library may misbehave 285 + // if some GOLOG values are set, especially GOLOG_FILE. 286 + func SetupSlog(options LogOptions) (*slog.Logger, error) { 287 + fmt.Fprintf(os.Stderr, "SetupSlog\n") 288 + var hopts slog.HandlerOptions 289 + hopts.Level = slog.LevelInfo 290 + hopts.AddSource = true 291 + if options.LogLevel == "" { 292 + options.LogLevel = firstenv("BSKYLOG_LOG_LEVEL", "GOLOG_LOG_LEVEL") 293 + } 294 + if options.LogLevel == "" { 295 + hopts.Level = slog.LevelInfo 296 + options.LogLevel = "info" 297 + } else { 298 + level := strings.ToLower(options.LogLevel) 299 + switch level { 300 + case "debug": 301 + hopts.Level = slog.LevelDebug 302 + case "info": 303 + hopts.Level = slog.LevelInfo 304 + case "warn": 305 + hopts.Level = slog.LevelWarn 306 + case "error": 307 + hopts.Level = slog.LevelError 308 + default: 309 + return nil, fmt.Errorf("unknown log level: %#v", options.LogLevel) 310 + } 311 + } 312 + if options.LogFormat == "" { 313 + options.LogFormat = firstenv("BSKYLOG_LOG_FMT", "GOLOG_LOG_FMT") 314 + } 315 + if options.LogFormat == "" { 316 + options.LogFormat = "text" 317 + } else { 318 + format := strings.ToLower(options.LogFormat) 319 + if format == "json" || format == "text" { 320 + // ok 321 + } else { 322 + return nil, fmt.Errorf("invalid log format: %#v", options.LogFormat) 323 + } 324 + options.LogFormat = format 325 + } 326 + 327 + if options.LogPath == "" { 328 + options.LogPath = firstenv("BSKYLOG_FILE", "GOLOG_FILE") 329 + } 330 + if options.LogRotateBytes == 0 { 331 + rotateBytesStr := os.Getenv("BSKYLOG_ROTATE_BYTES") // no GOLOG equivalent 332 + if rotateBytesStr != "" { 333 + rotateBytes, err := strconv.ParseInt(rotateBytesStr, 10, 64) 334 + if err != nil { 335 + return nil, fmt.Errorf("invalid BSKYLOG_ROTATE_BYTES value: %w", err) 336 + } 337 + options.LogRotateBytes = rotateBytes 338 + } 339 + } 340 + if options.KeepOld == 0 { 341 + keepOldUnset := true 342 + keepOldStr := os.Getenv("BSKYLOG_ROTATE_KEEP") // no GOLOG equivalent 343 + if keepOldStr != "" { 344 + keepOld, err := strconv.ParseInt(keepOldStr, 10, 64) 345 + if err != nil { 346 + return nil, fmt.Errorf("invalid BSKYLOG_ROTATE_KEEP value: %w", err) 347 + } 348 + keepOldUnset = false 349 + options.KeepOld = int(keepOld) 350 + } 351 + if keepOldUnset { 352 + options.KeepOld = 2 353 + } 354 + } 355 + logaround := make(chan string, 100) 356 + go logbouncer(logaround) 357 + var out io.Writer 358 + if (options.LogPath == "") || (options.LogPath == "-") { 359 + out = os.Stdout 360 + } else if options.LogRotateBytes != 0 { 361 + out = &logRotateWriter{ 362 + rotateBytes: options.LogRotateBytes, 363 + outPathTemplate: options.LogPath, 364 + keep: options.KeepOld, 365 + logaround: logaround, 366 + } 367 + } else { 368 + var err error 369 + out, err = os.Create(options.LogPath) 370 + if err != nil { 371 + return nil, fmt.Errorf("%s: %w", options.LogPath, err) 372 + } 373 + fmt.Fprintf(os.Stderr, "SetupSlog create %#v\n", options.LogPath) 374 + } 375 + var handler slog.Handler 376 + switch options.LogFormat { 377 + case "text": 378 + handler = slog.NewTextHandler(out, &hopts) 379 + case "json": 380 + handler = slog.NewJSONHandler(out, &hopts) 381 + default: 382 + return nil, fmt.Errorf("unknown log format: %#v", options.LogFormat) 383 + } 384 + logger := slog.New(handler) 385 + slog.SetDefault(logger) 386 + templateDirPart, _ := filepath.Split(options.LogPath) 387 + ents, _ := os.ReadDir(templateDirPart) 388 + for _, ent := range ents { 389 + fmt.Fprintf(os.Stdout, "%s\n", filepath.Join(templateDirPart, ent.Name())) 390 + } 391 + SetIpfsWriter(out, options.LogFormat, options.LogLevel) 392 + return logger, nil 393 + } 394 + 395 + type logRotateWriter struct { 396 + currentWriter io.WriteCloser 397 + 398 + // how much has been written to current log file 399 + currentBytes int64 400 + 401 + // e.g. path/to/logs/foo%T 402 + currentPath string 403 + 404 + // e.g. path/to/logs/current 405 + currentPathCurrent string 406 + 407 + rotateBytes int64 408 + 409 + outPathTemplate string 410 + 411 + // keep the most recent N log files (not including current) 412 + keep int 413 + 414 + // write strings to this from inside the log system, a task outside the log system hands them to slog.Info() 415 + logaround chan<- string 416 + } 417 + 418 + func logbouncer(out <-chan string) { 419 + var logger *slog.Logger 420 + for line := range out { 421 + fmt.Fprintf(os.Stderr, "ll %s\n", line) 422 + if logger == nil { 423 + // lazy to make sure it crops up after slog Default has been set 424 + logger = slog.Default().With("system", "logging") 425 + } 426 + logger.Info(line) 427 + } 428 + } 429 + 430 + var currentMatcher = regexp.MustCompile("current_\\d+") 431 + 432 + func (w *logRotateWriter) cleanOldLogs() { 433 + if w.keep < 0 { 434 + // old log removal is disabled 435 + return 436 + } 437 + // w.currentPath was recently set as the new log 438 + dirpart, _ := filepath.Split(w.currentPath) 439 + // find old logs 440 + templateDirPart, templateNamePart := filepath.Split(w.outPathTemplate) 441 + if dirpart != templateDirPart { 442 + w.logaround <- fmt.Sprintf("current dir part %#v != template dir part %#v\n", w.currentPath, w.outPathTemplate) 443 + return 444 + } 445 + // build a regexp that is string literal parts with \d+ replacing the UnixMilli part 446 + templateNameParts := strings.Split(templateNamePart, "%T") 447 + var sb strings.Builder 448 + first := true 449 + for _, part := range templateNameParts { 450 + if first { 451 + first = false 452 + } else { 453 + sb.WriteString("\\d+") 454 + } 455 + sb.WriteString(regexp.QuoteMeta(part)) 456 + } 457 + tmre, err := regexp.Compile(sb.String()) 458 + if err != nil { 459 + w.logaround <- fmt.Sprintf("failed to compile old log template regexp: %#v\n", err) 460 + return 461 + } 462 + dir, err := os.ReadDir(dirpart) 463 + if err != nil { 464 + w.logaround <- fmt.Sprintf("failed to read old log template dir: %#v\n", err) 465 + return 466 + } 467 + var found []fs.FileInfo 468 + for _, ent := range dir { 469 + name := ent.Name() 470 + if tmre.MatchString(name) || currentMatcher.MatchString(name) { 471 + fi, err := ent.Info() 472 + if err != nil { 473 + continue 474 + } 475 + found = append(found, fi) 476 + } 477 + } 478 + if len(found) <= w.keep { 479 + // not too many, nothing to do 480 + return 481 + } 482 + foundMtimeLess := func(i, j int) bool { 483 + return found[i].ModTime().Before(found[j].ModTime()) 484 + } 485 + sort.Slice(found, foundMtimeLess) 486 + drops := found[:len(found)-w.keep] 487 + for _, fi := range drops { 488 + fullpath := filepath.Join(dirpart, fi.Name()) 489 + err = os.Remove(fullpath) 490 + if err != nil { 491 + w.logaround <- fmt.Sprintf("failed to rm old log: %#v\n", err) 492 + // but keep going 493 + } 494 + // maybe it would be safe to debug-log old log removal from within the logging infrastructure? 495 + } 496 + } 497 + 498 + func (w *logRotateWriter) closeOldLog() []error { 499 + if w.currentWriter == nil { 500 + return nil 501 + } 502 + var earlyWeakErrors []error 503 + err := w.currentWriter.Close() 504 + if err != nil { 505 + earlyWeakErrors = append(earlyWeakErrors, err) 506 + } 507 + w.currentWriter = nil 508 + w.currentBytes = 0 509 + w.currentPath = "" 510 + if w.currentPathCurrent != "" { 511 + err = os.Remove(w.currentPathCurrent) // not really an error until something else goes wrong 512 + if err != nil { 513 + earlyWeakErrors = append(earlyWeakErrors, err) 514 + } 515 + w.currentPathCurrent = "" 516 + } 517 + return earlyWeakErrors 518 + } 519 + 520 + func (w *logRotateWriter) openNewLog(earlyWeakErrors []error) (badErr error, weakErrors []error) { 521 + nowMillis := time.Now().UnixMilli() 522 + nows := strconv.FormatInt(nowMillis, 10) 523 + w.currentPath = strings.Replace(w.outPathTemplate, "%T", nows, -1) 524 + var err error 525 + w.currentWriter, err = os.Create(w.currentPath) 526 + if err != nil { 527 + earlyWeakErrors = append(earlyWeakErrors, err) 528 + return errors.Join(earlyWeakErrors...), nil 529 + } 530 + w.logaround <- fmt.Sprintf("new log file %#v", w.currentPath) 531 + w.cleanOldLogs() 532 + dirpart, _ := filepath.Split(w.currentPath) 533 + if dirpart != "" { 534 + w.currentPathCurrent = filepath.Join(dirpart, "current") 535 + fi, err := os.Stat(w.currentPathCurrent) 536 + if err == nil && fi.Mode().IsRegular() { 537 + // move aside unknown "current" from a previous run 538 + // see also currentMatcher regexp current_\d+ 539 + err = os.Rename(w.currentPathCurrent, w.currentPathCurrent+"_"+nows) 540 + if err != nil { 541 + // not crucial if we can't move aside "current" 542 + // TODO: log warning ... but not from inside log writer? 543 + earlyWeakErrors = append(earlyWeakErrors, err) 544 + } 545 + } 546 + err = os.Link(w.currentPath, w.currentPathCurrent) 547 + if err != nil { 548 + // not crucial if we can't make "current" link 549 + // TODO: log warning ... but not from inside log writer? 550 + earlyWeakErrors = append(earlyWeakErrors, err) 551 + } 552 + } 553 + return nil, earlyWeakErrors 554 + } 555 + 556 + func (w *logRotateWriter) Write(p []byte) (n int, err error) { 557 + var earlyWeakErrors []error 558 + if int64(len(p))+w.currentBytes > w.rotateBytes { 559 + // next write would be over the limit 560 + earlyWeakErrors = w.closeOldLog() 561 + } 562 + if w.currentWriter == nil { 563 + // start new log file 564 + var err error 565 + err, earlyWeakErrors = w.openNewLog(earlyWeakErrors) 566 + if err != nil { 567 + return 0, err 568 + } 569 + } 570 + var wrote int 571 + wrote, err = w.currentWriter.Write(p) 572 + w.currentBytes += int64(wrote) 573 + if err != nil { 574 + earlyWeakErrors = append(earlyWeakErrors, err) 575 + return wrote, errors.Join(earlyWeakErrors...) 576 + } 577 + if earlyWeakErrors != nil { 578 + w.logaround <- fmt.Sprintf("ok, but: %s", errors.Join(earlyWeakErrors...).Error()) 579 + } 580 + return wrote, nil 581 + }