+52
.github/workflows/container-rainbow-aws.yaml
+52
.github/workflows/container-rainbow-aws.yaml
···
1
+
name: container-rainbow-aws
2
+
on: [push]
3
+
env:
4
+
REGISTRY: ${{ secrets.AWS_ECR_REGISTRY_USEAST2_PACKAGES_REGISTRY }}
5
+
USERNAME: ${{ secrets.AWS_ECR_REGISTRY_USEAST2_PACKAGES_USERNAME }}
6
+
PASSWORD: ${{ secrets.AWS_ECR_REGISTRY_USEAST2_PACKAGES_PASSWORD }}
7
+
# github.repository as <account>/<repo>
8
+
IMAGE_NAME: rainbow
9
+
10
+
jobs:
11
+
container-rainbow-aws:
12
+
if: github.repository == 'bluesky-social/indigo'
13
+
runs-on: ubuntu-latest
14
+
permissions:
15
+
contents: read
16
+
packages: write
17
+
id-token: write
18
+
19
+
steps:
20
+
- name: Checkout repository
21
+
uses: actions/checkout@v3
22
+
23
+
- name: Setup Docker buildx
24
+
uses: docker/setup-buildx-action@v1
25
+
26
+
- name: Log into registry ${{ env.REGISTRY }}
27
+
uses: docker/login-action@v2
28
+
with:
29
+
registry: ${{ env.REGISTRY }}
30
+
username: ${{ env.USERNAME }}
31
+
password: ${{ env.PASSWORD }}
32
+
33
+
- name: Extract Docker metadata
34
+
id: meta
35
+
uses: docker/metadata-action@v4
36
+
with:
37
+
images: |
38
+
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
39
+
tags: |
40
+
type=sha,enable=true,priority=100,prefix=,suffix=,format=long
41
+
42
+
- name: Build and push Docker image
43
+
id: build-and-push
44
+
uses: docker/build-push-action@v4
45
+
with:
46
+
context: .
47
+
file: ./cmd/rainbow/Dockerfile
48
+
push: ${{ github.event_name != 'pull_request' }}
49
+
tags: ${{ steps.meta.outputs.tags }}
50
+
labels: ${{ steps.meta.outputs.labels }}
51
+
cache-from: type=gha
52
+
cache-to: type=gha,mode=max
+2
-2
.github/workflows/golang.yml
+2
-2
.github/workflows/golang.yml
···
19
19
- name: Set up Go tooling
20
20
uses: actions/setup-go@v4
21
21
with:
22
-
go-version: "1.22"
22
+
go-version: "1.23"
23
23
- name: Build
24
24
run: make build
25
25
- name: Test
···
32
32
- name: Set up Go tooling
33
33
uses: actions/setup-go@v4
34
34
with:
35
-
go-version: "1.22"
35
+
go-version: "1.23"
36
36
- name: Lint
37
37
run: make lint
+1
HACKING.md
+1
HACKING.md
+4
api/agnostic/doc.go
+4
api/agnostic/doc.go
+189
api/agnostic/repoapplyWrites.go
+189
api/agnostic/repoapplyWrites.go
···
1
+
// Copied from indigo:api/atproto/repoapplyWrites.go
2
+
3
+
package agnostic
4
+
5
+
// schema: com.atproto.repo.applyWrites
6
+
7
+
import (
8
+
"context"
9
+
"encoding/json"
10
+
"fmt"
11
+
12
+
"github.com/bluesky-social/indigo/lex/util"
13
+
"github.com/bluesky-social/indigo/xrpc"
14
+
)
15
+
16
+
// RepoApplyWrites_Create is a "create" in the com.atproto.repo.applyWrites schema.
17
+
//
18
+
// Operation which creates a new record.
19
+
//
20
+
// RECORDTYPE: RepoApplyWrites_Create
21
+
type RepoApplyWrites_Create struct {
22
+
LexiconTypeID string `json:"$type,const=com.atproto.repo.applyWrites#create" cborgen:"$type,const=com.atproto.repo.applyWrites#create"`
23
+
Collection string `json:"collection" cborgen:"collection"`
24
+
Rkey *string `json:"rkey,omitempty" cborgen:"rkey,omitempty"`
25
+
Value *json.RawMessage `json:"value" cborgen:"value"`
26
+
}
27
+
28
+
// RepoApplyWrites_CreateResult is a "createResult" in the com.atproto.repo.applyWrites schema.
29
+
//
30
+
// RECORDTYPE: RepoApplyWrites_CreateResult
31
+
type RepoApplyWrites_CreateResult struct {
32
+
LexiconTypeID string `json:"$type,const=com.atproto.repo.applyWrites#createResult" cborgen:"$type,const=com.atproto.repo.applyWrites#createResult"`
33
+
Cid string `json:"cid" cborgen:"cid"`
34
+
Uri string `json:"uri" cborgen:"uri"`
35
+
ValidationStatus *string `json:"validationStatus,omitempty" cborgen:"validationStatus,omitempty"`
36
+
}
37
+
38
+
// RepoApplyWrites_Delete is a "delete" in the com.atproto.repo.applyWrites schema.
39
+
//
40
+
// Operation which deletes an existing record.
41
+
//
42
+
// RECORDTYPE: RepoApplyWrites_Delete
43
+
type RepoApplyWrites_Delete struct {
44
+
LexiconTypeID string `json:"$type,const=com.atproto.repo.applyWrites#delete" cborgen:"$type,const=com.atproto.repo.applyWrites#delete"`
45
+
Collection string `json:"collection" cborgen:"collection"`
46
+
Rkey string `json:"rkey" cborgen:"rkey"`
47
+
}
48
+
49
+
// RepoApplyWrites_DeleteResult is a "deleteResult" in the com.atproto.repo.applyWrites schema.
50
+
//
51
+
// RECORDTYPE: RepoApplyWrites_DeleteResult
52
+
type RepoApplyWrites_DeleteResult struct {
53
+
LexiconTypeID string `json:"$type,const=com.atproto.repo.applyWrites#deleteResult" cborgen:"$type,const=com.atproto.repo.applyWrites#deleteResult"`
54
+
}
55
+
56
+
// RepoApplyWrites_Input is the input argument to a com.atproto.repo.applyWrites call.
57
+
type RepoApplyWrites_Input struct {
58
+
// repo: The handle or DID of the repo (aka, current account).
59
+
Repo string `json:"repo" cborgen:"repo"`
60
+
// swapCommit: If provided, the entire operation will fail if the current repo commit CID does not match this value. Used to prevent conflicting repo mutations.
61
+
SwapCommit *string `json:"swapCommit,omitempty" cborgen:"swapCommit,omitempty"`
62
+
// validate: Can be set to 'false' to skip Lexicon schema validation of record data across all operations, 'true' to require it, or leave unset to validate only for known Lexicons.
63
+
Validate *bool `json:"validate,omitempty" cborgen:"validate,omitempty"`
64
+
Writes []*RepoApplyWrites_Input_Writes_Elem `json:"writes" cborgen:"writes"`
65
+
}
66
+
67
+
type RepoApplyWrites_Input_Writes_Elem struct {
68
+
RepoApplyWrites_Create *RepoApplyWrites_Create
69
+
RepoApplyWrites_Update *RepoApplyWrites_Update
70
+
RepoApplyWrites_Delete *RepoApplyWrites_Delete
71
+
}
72
+
73
+
func (t *RepoApplyWrites_Input_Writes_Elem) MarshalJSON() ([]byte, error) {
74
+
if t.RepoApplyWrites_Create != nil {
75
+
t.RepoApplyWrites_Create.LexiconTypeID = "com.atproto.repo.applyWrites#create"
76
+
return json.Marshal(t.RepoApplyWrites_Create)
77
+
}
78
+
if t.RepoApplyWrites_Update != nil {
79
+
t.RepoApplyWrites_Update.LexiconTypeID = "com.atproto.repo.applyWrites#update"
80
+
return json.Marshal(t.RepoApplyWrites_Update)
81
+
}
82
+
if t.RepoApplyWrites_Delete != nil {
83
+
t.RepoApplyWrites_Delete.LexiconTypeID = "com.atproto.repo.applyWrites#delete"
84
+
return json.Marshal(t.RepoApplyWrites_Delete)
85
+
}
86
+
return nil, fmt.Errorf("cannot marshal empty enum")
87
+
}
88
+
func (t *RepoApplyWrites_Input_Writes_Elem) UnmarshalJSON(b []byte) error {
89
+
typ, err := util.TypeExtract(b)
90
+
if err != nil {
91
+
return err
92
+
}
93
+
94
+
switch typ {
95
+
case "com.atproto.repo.applyWrites#create":
96
+
t.RepoApplyWrites_Create = new(RepoApplyWrites_Create)
97
+
return json.Unmarshal(b, t.RepoApplyWrites_Create)
98
+
case "com.atproto.repo.applyWrites#update":
99
+
t.RepoApplyWrites_Update = new(RepoApplyWrites_Update)
100
+
return json.Unmarshal(b, t.RepoApplyWrites_Update)
101
+
case "com.atproto.repo.applyWrites#delete":
102
+
t.RepoApplyWrites_Delete = new(RepoApplyWrites_Delete)
103
+
return json.Unmarshal(b, t.RepoApplyWrites_Delete)
104
+
105
+
default:
106
+
return fmt.Errorf("closed enums must have a matching value")
107
+
}
108
+
}
109
+
110
+
// RepoApplyWrites_Output is the output of a com.atproto.repo.applyWrites call.
111
+
type RepoApplyWrites_Output struct {
112
+
Commit *RepoDefs_CommitMeta `json:"commit,omitempty" cborgen:"commit,omitempty"`
113
+
Results []*RepoApplyWrites_Output_Results_Elem `json:"results,omitempty" cborgen:"results,omitempty"`
114
+
}
115
+
116
+
type RepoApplyWrites_Output_Results_Elem struct {
117
+
RepoApplyWrites_CreateResult *RepoApplyWrites_CreateResult
118
+
RepoApplyWrites_UpdateResult *RepoApplyWrites_UpdateResult
119
+
RepoApplyWrites_DeleteResult *RepoApplyWrites_DeleteResult
120
+
}
121
+
122
+
func (t *RepoApplyWrites_Output_Results_Elem) MarshalJSON() ([]byte, error) {
123
+
if t.RepoApplyWrites_CreateResult != nil {
124
+
t.RepoApplyWrites_CreateResult.LexiconTypeID = "com.atproto.repo.applyWrites#createResult"
125
+
return json.Marshal(t.RepoApplyWrites_CreateResult)
126
+
}
127
+
if t.RepoApplyWrites_UpdateResult != nil {
128
+
t.RepoApplyWrites_UpdateResult.LexiconTypeID = "com.atproto.repo.applyWrites#updateResult"
129
+
return json.Marshal(t.RepoApplyWrites_UpdateResult)
130
+
}
131
+
if t.RepoApplyWrites_DeleteResult != nil {
132
+
t.RepoApplyWrites_DeleteResult.LexiconTypeID = "com.atproto.repo.applyWrites#deleteResult"
133
+
return json.Marshal(t.RepoApplyWrites_DeleteResult)
134
+
}
135
+
return nil, fmt.Errorf("cannot marshal empty enum")
136
+
}
137
+
func (t *RepoApplyWrites_Output_Results_Elem) UnmarshalJSON(b []byte) error {
138
+
typ, err := util.TypeExtract(b)
139
+
if err != nil {
140
+
return err
141
+
}
142
+
143
+
switch typ {
144
+
case "com.atproto.repo.applyWrites#createResult":
145
+
t.RepoApplyWrites_CreateResult = new(RepoApplyWrites_CreateResult)
146
+
return json.Unmarshal(b, t.RepoApplyWrites_CreateResult)
147
+
case "com.atproto.repo.applyWrites#updateResult":
148
+
t.RepoApplyWrites_UpdateResult = new(RepoApplyWrites_UpdateResult)
149
+
return json.Unmarshal(b, t.RepoApplyWrites_UpdateResult)
150
+
case "com.atproto.repo.applyWrites#deleteResult":
151
+
t.RepoApplyWrites_DeleteResult = new(RepoApplyWrites_DeleteResult)
152
+
return json.Unmarshal(b, t.RepoApplyWrites_DeleteResult)
153
+
154
+
default:
155
+
return fmt.Errorf("closed enums must have a matching value")
156
+
}
157
+
}
158
+
159
+
// RepoApplyWrites_Update is a "update" in the com.atproto.repo.applyWrites schema.
160
+
//
161
+
// Operation which updates an existing record.
162
+
//
163
+
// RECORDTYPE: RepoApplyWrites_Update
164
+
type RepoApplyWrites_Update struct {
165
+
LexiconTypeID string `json:"$type,const=com.atproto.repo.applyWrites#update" cborgen:"$type,const=com.atproto.repo.applyWrites#update"`
166
+
Collection string `json:"collection" cborgen:"collection"`
167
+
Rkey string `json:"rkey" cborgen:"rkey"`
168
+
Value *json.RawMessage `json:"value" cborgen:"value"`
169
+
}
170
+
171
+
// RepoApplyWrites_UpdateResult is a "updateResult" in the com.atproto.repo.applyWrites schema.
172
+
//
173
+
// RECORDTYPE: RepoApplyWrites_UpdateResult
174
+
type RepoApplyWrites_UpdateResult struct {
175
+
LexiconTypeID string `json:"$type,const=com.atproto.repo.applyWrites#updateResult" cborgen:"$type,const=com.atproto.repo.applyWrites#updateResult"`
176
+
Cid string `json:"cid" cborgen:"cid"`
177
+
Uri string `json:"uri" cborgen:"uri"`
178
+
ValidationStatus *string `json:"validationStatus,omitempty" cborgen:"validationStatus,omitempty"`
179
+
}
180
+
181
+
// RepoApplyWrites calls the XRPC method "com.atproto.repo.applyWrites".
182
+
func RepoApplyWrites(ctx context.Context, c *xrpc.Client, input *RepoApplyWrites_Input) (*RepoApplyWrites_Output, error) {
183
+
var out RepoApplyWrites_Output
184
+
if err := c.Do(ctx, xrpc.Procedure, "application/json", "com.atproto.repo.applyWrites", nil, input, &out); err != nil {
185
+
return nil, err
186
+
}
187
+
188
+
return &out, nil
189
+
}
+2
api/atproto/servercreateSession.go
+2
api/atproto/servercreateSession.go
···
12
12
13
13
// ServerCreateSession_Input is the input argument to a com.atproto.server.createSession call.
14
14
type ServerCreateSession_Input struct {
15
+
// allowTakendown: When true, instead of throwing error for takendown accounts, a valid response with a narrow scoped token will be returned
16
+
AllowTakendown *bool `json:"allowTakendown,omitempty" cborgen:"allowTakendown,omitempty"`
15
17
AuthFactorToken *string `json:"authFactorToken,omitempty" cborgen:"authFactorToken,omitempty"`
16
18
// identifier: Handle or other identifier supported by the server for the authenticating user.
17
19
Identifier string `json:"identifier" cborgen:"identifier"`
+30
api/atproto/tempaddReservedHandle.go
+30
api/atproto/tempaddReservedHandle.go
···
1
+
// Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT.
2
+
3
+
package atproto
4
+
5
+
// schema: com.atproto.temp.addReservedHandle
6
+
7
+
import (
8
+
"context"
9
+
10
+
"github.com/bluesky-social/indigo/xrpc"
11
+
)
12
+
13
+
// TempAddReservedHandle_Input is the input argument to a com.atproto.temp.addReservedHandle call.
14
+
type TempAddReservedHandle_Input struct {
15
+
Handle string `json:"handle" cborgen:"handle"`
16
+
}
17
+
18
+
// TempAddReservedHandle_Output is the output of a com.atproto.temp.addReservedHandle call.
19
+
type TempAddReservedHandle_Output struct {
20
+
}
21
+
22
+
// TempAddReservedHandle calls the XRPC method "com.atproto.temp.addReservedHandle".
23
+
func TempAddReservedHandle(ctx context.Context, c *xrpc.Client, input *TempAddReservedHandle_Input) (*TempAddReservedHandle_Output, error) {
24
+
var out TempAddReservedHandle_Output
25
+
if err := c.Do(ctx, xrpc.Procedure, "application/json", "com.atproto.temp.addReservedHandle", nil, input, &out); err != nil {
26
+
return nil, err
27
+
}
28
+
29
+
return &out, nil
30
+
}
+2
api/bsky/actorgetSuggestions.go
+2
api/bsky/actorgetSuggestions.go
···
14
14
type ActorGetSuggestions_Output struct {
15
15
Actors []*ActorDefs_ProfileView `json:"actors" cborgen:"actors"`
16
16
Cursor *string `json:"cursor,omitempty" cborgen:"cursor,omitempty"`
17
+
// recId: Snowflake for this recommendation, use when submitting recommendation events.
18
+
RecId *int64 `json:"recId,omitempty" cborgen:"recId,omitempty"`
17
19
}
18
20
19
21
// ActorGetSuggestions calls the XRPC method "app.bsky.actor.getSuggestions".
+3
-1
api/bsky/graphgetSuggestedFollowsByActor.go
+3
-1
api/bsky/graphgetSuggestedFollowsByActor.go
···
13
13
// GraphGetSuggestedFollowsByActor_Output is the output of a app.bsky.graph.getSuggestedFollowsByActor call.
14
14
type GraphGetSuggestedFollowsByActor_Output struct {
15
15
// isFallback: If true, response has fallen-back to generic results, and is not scoped using relativeToDid
16
-
IsFallback *bool `json:"isFallback,omitempty" cborgen:"isFallback,omitempty"`
16
+
IsFallback *bool `json:"isFallback,omitempty" cborgen:"isFallback,omitempty"`
17
+
// recId: Snowflake for this recommendation, use when submitting recommendation events.
18
+
RecId *int64 `json:"recId,omitempty" cborgen:"recId,omitempty"`
17
19
Suggestions []*ActorDefs_ProfileView `json:"suggestions" cborgen:"suggestions"`
18
20
}
19
21
+35
api/bsky/graphsearchStarterPacks.go
+35
api/bsky/graphsearchStarterPacks.go
···
1
+
// Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT.
2
+
3
+
package bsky
4
+
5
+
// schema: app.bsky.graph.searchStarterPacks
6
+
7
+
import (
8
+
"context"
9
+
10
+
"github.com/bluesky-social/indigo/xrpc"
11
+
)
12
+
13
+
// GraphSearchStarterPacks_Output is the output of a app.bsky.graph.searchStarterPacks call.
14
+
type GraphSearchStarterPacks_Output struct {
15
+
Cursor *string `json:"cursor,omitempty" cborgen:"cursor,omitempty"`
16
+
StarterPacks []*GraphDefs_StarterPackViewBasic `json:"starterPacks" cborgen:"starterPacks"`
17
+
}
18
+
19
+
// GraphSearchStarterPacks calls the XRPC method "app.bsky.graph.searchStarterPacks".
20
+
//
21
+
// q: Search query string. Syntax, phrase, boolean, and faceting is unspecified, but Lucene query syntax is recommended.
22
+
func GraphSearchStarterPacks(ctx context.Context, c *xrpc.Client, cursor string, limit int64, q string) (*GraphSearchStarterPacks_Output, error) {
23
+
var out GraphSearchStarterPacks_Output
24
+
25
+
params := map[string]interface{}{
26
+
"cursor": cursor,
27
+
"limit": limit,
28
+
"q": q,
29
+
}
30
+
if err := c.Do(ctx, xrpc.Query, "", "app.bsky.graph.searchStarterPacks", params, nil, &out); err != nil {
31
+
return nil, err
32
+
}
33
+
34
+
return &out, nil
35
+
}
+4
-1
api/bsky/notificationlistNotifications.go
+4
-1
api/bsky/notificationlistNotifications.go
···
35
35
}
36
36
37
37
// NotificationListNotifications calls the XRPC method "app.bsky.notification.listNotifications".
38
-
func NotificationListNotifications(ctx context.Context, c *xrpc.Client, cursor string, limit int64, priority bool, seenAt string) (*NotificationListNotifications_Output, error) {
38
+
//
39
+
// reasons: Notification reasons to include in response.
40
+
func NotificationListNotifications(ctx context.Context, c *xrpc.Client, cursor string, limit int64, priority bool, reasons []string, seenAt string) (*NotificationListNotifications_Output, error) {
39
41
var out NotificationListNotifications_Output
40
42
41
43
params := map[string]interface{}{
42
44
"cursor": cursor,
43
45
"limit": limit,
44
46
"priority": priority,
47
+
"reasons": reasons,
45
48
"seenAt": seenAt,
46
49
}
47
50
if err := c.Do(ctx, xrpc.Query, "", "app.bsky.notification.listNotifications", params, nil, &out); err != nil {
+13
api/bsky/unspecceddefs.go
+13
api/bsky/unspecceddefs.go
···
13
13
type UnspeccedDefs_SkeletonSearchPost struct {
14
14
Uri string `json:"uri" cborgen:"uri"`
15
15
}
16
+
17
+
// UnspeccedDefs_SkeletonSearchStarterPack is a "skeletonSearchStarterPack" in the app.bsky.unspecced.defs schema.
18
+
type UnspeccedDefs_SkeletonSearchStarterPack struct {
19
+
Uri string `json:"uri" cborgen:"uri"`
20
+
}
21
+
22
+
// UnspeccedDefs_TrendingTopic is a "trendingTopic" in the app.bsky.unspecced.defs schema.
23
+
type UnspeccedDefs_TrendingTopic struct {
24
+
Description *string `json:"description,omitempty" cborgen:"description,omitempty"`
25
+
DisplayName *string `json:"displayName,omitempty" cborgen:"displayName,omitempty"`
26
+
Link string `json:"link" cborgen:"link"`
27
+
Topic string `json:"topic" cborgen:"topic"`
28
+
}
+2
api/bsky/unspeccedgetSuggestionsSkeleton.go
+2
api/bsky/unspeccedgetSuggestionsSkeleton.go
···
14
14
type UnspeccedGetSuggestionsSkeleton_Output struct {
15
15
Actors []*UnspeccedDefs_SkeletonSearchActor `json:"actors" cborgen:"actors"`
16
16
Cursor *string `json:"cursor,omitempty" cborgen:"cursor,omitempty"`
17
+
// recId: Snowflake for this recommendation, use when submitting recommendation events.
18
+
RecId *int64 `json:"recId,omitempty" cborgen:"recId,omitempty"`
17
19
// relativeToDid: DID of the account these suggestions are relative to. If this is returned undefined, suggestions are based on the viewer.
18
20
RelativeToDid *string `json:"relativeToDid,omitempty" cborgen:"relativeToDid,omitempty"`
19
21
}
+34
api/bsky/unspeccedgetTrendingTopics.go
+34
api/bsky/unspeccedgetTrendingTopics.go
···
1
+
// Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT.
2
+
3
+
package bsky
4
+
5
+
// schema: app.bsky.unspecced.getTrendingTopics
6
+
7
+
import (
8
+
"context"
9
+
10
+
"github.com/bluesky-social/indigo/xrpc"
11
+
)
12
+
13
+
// UnspeccedGetTrendingTopics_Output is the output of a app.bsky.unspecced.getTrendingTopics call.
14
+
type UnspeccedGetTrendingTopics_Output struct {
15
+
Suggested []*UnspeccedDefs_TrendingTopic `json:"suggested" cborgen:"suggested"`
16
+
Topics []*UnspeccedDefs_TrendingTopic `json:"topics" cborgen:"topics"`
17
+
}
18
+
19
+
// UnspeccedGetTrendingTopics calls the XRPC method "app.bsky.unspecced.getTrendingTopics".
20
+
//
21
+
// viewer: DID of the account making the request (not included for public/unauthenticated queries). Used to boost followed accounts in ranking.
22
+
func UnspeccedGetTrendingTopics(ctx context.Context, c *xrpc.Client, limit int64, viewer string) (*UnspeccedGetTrendingTopics_Output, error) {
23
+
var out UnspeccedGetTrendingTopics_Output
24
+
25
+
params := map[string]interface{}{
26
+
"limit": limit,
27
+
"viewer": viewer,
28
+
}
29
+
if err := c.Do(ctx, xrpc.Query, "", "app.bsky.unspecced.getTrendingTopics", params, nil, &out); err != nil {
30
+
return nil, err
31
+
}
32
+
33
+
return &out, nil
34
+
}
+40
api/bsky/unspeccedsearchStarterPacksSkeleton.go
+40
api/bsky/unspeccedsearchStarterPacksSkeleton.go
···
1
+
// Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT.
2
+
3
+
package bsky
4
+
5
+
// schema: app.bsky.unspecced.searchStarterPacksSkeleton
6
+
7
+
import (
8
+
"context"
9
+
10
+
"github.com/bluesky-social/indigo/xrpc"
11
+
)
12
+
13
+
// UnspeccedSearchStarterPacksSkeleton_Output is the output of a app.bsky.unspecced.searchStarterPacksSkeleton call.
14
+
type UnspeccedSearchStarterPacksSkeleton_Output struct {
15
+
Cursor *string `json:"cursor,omitempty" cborgen:"cursor,omitempty"`
16
+
// hitsTotal: Count of search hits. Optional, may be rounded/truncated, and may not be possible to paginate through all hits.
17
+
HitsTotal *int64 `json:"hitsTotal,omitempty" cborgen:"hitsTotal,omitempty"`
18
+
StarterPacks []*UnspeccedDefs_SkeletonSearchStarterPack `json:"starterPacks" cborgen:"starterPacks"`
19
+
}
20
+
21
+
// UnspeccedSearchStarterPacksSkeleton calls the XRPC method "app.bsky.unspecced.searchStarterPacksSkeleton".
22
+
//
23
+
// cursor: Optional pagination mechanism; may not necessarily allow scrolling through entire result set.
24
+
// q: Search query string; syntax, phrase, boolean, and faceting is unspecified, but Lucene query syntax is recommended.
25
+
// viewer: DID of the account making the request (not included for public/unauthenticated queries).
26
+
func UnspeccedSearchStarterPacksSkeleton(ctx context.Context, c *xrpc.Client, cursor string, limit int64, q string, viewer string) (*UnspeccedSearchStarterPacksSkeleton_Output, error) {
27
+
var out UnspeccedSearchStarterPacksSkeleton_Output
28
+
29
+
params := map[string]interface{}{
30
+
"cursor": cursor,
31
+
"limit": limit,
32
+
"q": q,
33
+
"viewer": viewer,
34
+
}
35
+
if err := c.Do(ctx, xrpc.Query, "", "app.bsky.unspecced.searchStarterPacksSkeleton", params, nil, &out); err != nil {
36
+
return nil, err
37
+
}
38
+
39
+
return &out, nil
40
+
}
+1
api/chat/convodefs.go
+1
api/chat/convodefs.go
···
18
18
LastMessage *ConvoDefs_ConvoView_LastMessage `json:"lastMessage,omitempty" cborgen:"lastMessage,omitempty"`
19
19
Members []*ActorDefs_ProfileViewBasic `json:"members" cborgen:"members"`
20
20
Muted bool `json:"muted" cborgen:"muted"`
21
+
Opened *bool `json:"opened,omitempty" cborgen:"opened,omitempty"`
21
22
Rev string `json:"rev" cborgen:"rev"`
22
23
UnreadCount int64 `json:"unreadCount" cborgen:"unreadCount"`
23
24
}
+159
-6
api/ozone/moderationdefs.go
+159
-6
api/ozone/moderationdefs.go
···
13
13
"github.com/bluesky-social/indigo/lex/util"
14
14
)
15
15
16
+
// ModerationDefs_AccountEvent is a "accountEvent" in the tools.ozone.moderation.defs schema.
17
+
//
18
+
// Logs account status related events on a repo subject. Normally captured by automod from the firehose and emitted to ozone for historical tracking.
19
+
//
20
+
// RECORDTYPE: ModerationDefs_AccountEvent
21
+
type ModerationDefs_AccountEvent struct {
22
+
LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#accountEvent" cborgen:"$type,const=tools.ozone.moderation.defs#accountEvent"`
23
+
// active: Indicates that the account has a repository which can be fetched from the host that emitted this event.
24
+
Active bool `json:"active" cborgen:"active"`
25
+
Comment *string `json:"comment,omitempty" cborgen:"comment,omitempty"`
26
+
Status *string `json:"status,omitempty" cborgen:"status,omitempty"`
27
+
Timestamp string `json:"timestamp" cborgen:"timestamp"`
28
+
}
29
+
30
+
// ModerationDefs_AccountHosting is a "accountHosting" in the tools.ozone.moderation.defs schema.
31
+
//
32
+
// RECORDTYPE: ModerationDefs_AccountHosting
33
+
type ModerationDefs_AccountHosting struct {
34
+
LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#accountHosting" cborgen:"$type,const=tools.ozone.moderation.defs#accountHosting"`
35
+
CreatedAt *string `json:"createdAt,omitempty" cborgen:"createdAt,omitempty"`
36
+
DeactivatedAt *string `json:"deactivatedAt,omitempty" cborgen:"deactivatedAt,omitempty"`
37
+
DeletedAt *string `json:"deletedAt,omitempty" cborgen:"deletedAt,omitempty"`
38
+
ReactivatedAt *string `json:"reactivatedAt,omitempty" cborgen:"reactivatedAt,omitempty"`
39
+
Status string `json:"status" cborgen:"status"`
40
+
UpdatedAt *string `json:"updatedAt,omitempty" cborgen:"updatedAt,omitempty"`
41
+
}
42
+
16
43
// ModerationDefs_BlobView is a "blobView" in the tools.ozone.moderation.defs schema.
17
44
type ModerationDefs_BlobView struct {
18
45
Cid string `json:"cid" cborgen:"cid"`
···
58
85
}
59
86
}
60
87
88
+
// ModerationDefs_IdentityEvent is a "identityEvent" in the tools.ozone.moderation.defs schema.
89
+
//
90
+
// Logs identity related events on a repo subject. Normally captured by automod from the firehose and emitted to ozone for historical tracking.
91
+
//
92
+
// RECORDTYPE: ModerationDefs_IdentityEvent
93
+
type ModerationDefs_IdentityEvent struct {
94
+
LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#identityEvent" cborgen:"$type,const=tools.ozone.moderation.defs#identityEvent"`
95
+
Comment *string `json:"comment,omitempty" cborgen:"comment,omitempty"`
96
+
Handle *string `json:"handle,omitempty" cborgen:"handle,omitempty"`
97
+
PdsHost *string `json:"pdsHost,omitempty" cborgen:"pdsHost,omitempty"`
98
+
Timestamp string `json:"timestamp" cborgen:"timestamp"`
99
+
Tombstone *bool `json:"tombstone,omitempty" cborgen:"tombstone,omitempty"`
100
+
}
101
+
61
102
// ModerationDefs_ImageDetails is a "imageDetails" in the tools.ozone.moderation.defs schema.
62
103
//
63
104
// RECORDTYPE: ModerationDefs_ImageDetails
···
71
112
//
72
113
// RECORDTYPE: ModerationDefs_ModEventAcknowledge
73
114
type ModerationDefs_ModEventAcknowledge struct {
74
-
LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#modEventAcknowledge" cborgen:"$type,const=tools.ozone.moderation.defs#modEventAcknowledge"`
75
-
Comment *string `json:"comment,omitempty" cborgen:"comment,omitempty"`
115
+
LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#modEventAcknowledge" cborgen:"$type,const=tools.ozone.moderation.defs#modEventAcknowledge"`
116
+
// acknowledgeAccountSubjects: If true, all other reports on content authored by this account will be resolved (acknowledged).
117
+
AcknowledgeAccountSubjects *bool `json:"acknowledgeAccountSubjects,omitempty" cborgen:"acknowledgeAccountSubjects,omitempty"`
118
+
Comment *string `json:"comment,omitempty" cborgen:"comment,omitempty"`
76
119
}
77
120
78
121
// ModerationDefs_ModEventComment is a "modEventComment" in the tools.ozone.moderation.defs schema.
···
152
195
type ModerationDefs_ModEventMuteReporter struct {
153
196
LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#modEventMuteReporter" cborgen:"$type,const=tools.ozone.moderation.defs#modEventMuteReporter"`
154
197
Comment *string `json:"comment,omitempty" cborgen:"comment,omitempty"`
155
-
// durationInHours: Indicates how long the account should remain muted.
156
-
DurationInHours int64 `json:"durationInHours" cborgen:"durationInHours"`
198
+
// durationInHours: Indicates how long the account should remain muted. Falsy value here means a permanent mute.
199
+
DurationInHours *int64 `json:"durationInHours,omitempty" cborgen:"durationInHours,omitempty"`
157
200
}
158
201
159
202
// ModerationDefs_ModEventReport is a "modEventReport" in the tools.ozone.moderation.defs schema.
···
218
261
Comment *string `json:"comment,omitempty" cborgen:"comment,omitempty"`
219
262
// durationInHours: Indicates how long the takedown should be in effect before automatically expiring.
220
263
DurationInHours *int64 `json:"durationInHours,omitempty" cborgen:"durationInHours,omitempty"`
264
+
// policies: Names/Keywords of the policies that drove the decision.
265
+
Policies []string `json:"policies,omitempty" cborgen:"policies,omitempty"`
221
266
}
222
267
223
268
// ModerationDefs_ModEventUnmute is a "modEventUnmute" in the tools.ozone.moderation.defs schema.
···
280
325
ModerationDefs_ModEventResolveAppeal *ModerationDefs_ModEventResolveAppeal
281
326
ModerationDefs_ModEventDivert *ModerationDefs_ModEventDivert
282
327
ModerationDefs_ModEventTag *ModerationDefs_ModEventTag
328
+
ModerationDefs_AccountEvent *ModerationDefs_AccountEvent
329
+
ModerationDefs_IdentityEvent *ModerationDefs_IdentityEvent
330
+
ModerationDefs_RecordEvent *ModerationDefs_RecordEvent
283
331
}
284
332
285
333
func (t *ModerationDefs_ModEventViewDetail_Event) MarshalJSON() ([]byte, error) {
···
343
391
t.ModerationDefs_ModEventTag.LexiconTypeID = "tools.ozone.moderation.defs#modEventTag"
344
392
return json.Marshal(t.ModerationDefs_ModEventTag)
345
393
}
394
+
if t.ModerationDefs_AccountEvent != nil {
395
+
t.ModerationDefs_AccountEvent.LexiconTypeID = "tools.ozone.moderation.defs#accountEvent"
396
+
return json.Marshal(t.ModerationDefs_AccountEvent)
397
+
}
398
+
if t.ModerationDefs_IdentityEvent != nil {
399
+
t.ModerationDefs_IdentityEvent.LexiconTypeID = "tools.ozone.moderation.defs#identityEvent"
400
+
return json.Marshal(t.ModerationDefs_IdentityEvent)
401
+
}
402
+
if t.ModerationDefs_RecordEvent != nil {
403
+
t.ModerationDefs_RecordEvent.LexiconTypeID = "tools.ozone.moderation.defs#recordEvent"
404
+
return json.Marshal(t.ModerationDefs_RecordEvent)
405
+
}
346
406
return nil, fmt.Errorf("cannot marshal empty enum")
347
407
}
348
408
func (t *ModerationDefs_ModEventViewDetail_Event) UnmarshalJSON(b []byte) error {
···
397
457
case "tools.ozone.moderation.defs#modEventTag":
398
458
t.ModerationDefs_ModEventTag = new(ModerationDefs_ModEventTag)
399
459
return json.Unmarshal(b, t.ModerationDefs_ModEventTag)
460
+
case "tools.ozone.moderation.defs#accountEvent":
461
+
t.ModerationDefs_AccountEvent = new(ModerationDefs_AccountEvent)
462
+
return json.Unmarshal(b, t.ModerationDefs_AccountEvent)
463
+
case "tools.ozone.moderation.defs#identityEvent":
464
+
t.ModerationDefs_IdentityEvent = new(ModerationDefs_IdentityEvent)
465
+
return json.Unmarshal(b, t.ModerationDefs_IdentityEvent)
466
+
case "tools.ozone.moderation.defs#recordEvent":
467
+
t.ModerationDefs_RecordEvent = new(ModerationDefs_RecordEvent)
468
+
return json.Unmarshal(b, t.ModerationDefs_RecordEvent)
400
469
401
470
default:
402
471
return nil
···
470
539
ModerationDefs_ModEventResolveAppeal *ModerationDefs_ModEventResolveAppeal
471
540
ModerationDefs_ModEventDivert *ModerationDefs_ModEventDivert
472
541
ModerationDefs_ModEventTag *ModerationDefs_ModEventTag
542
+
ModerationDefs_AccountEvent *ModerationDefs_AccountEvent
543
+
ModerationDefs_IdentityEvent *ModerationDefs_IdentityEvent
544
+
ModerationDefs_RecordEvent *ModerationDefs_RecordEvent
473
545
}
474
546
475
547
func (t *ModerationDefs_ModEventView_Event) MarshalJSON() ([]byte, error) {
···
533
605
t.ModerationDefs_ModEventTag.LexiconTypeID = "tools.ozone.moderation.defs#modEventTag"
534
606
return json.Marshal(t.ModerationDefs_ModEventTag)
535
607
}
608
+
if t.ModerationDefs_AccountEvent != nil {
609
+
t.ModerationDefs_AccountEvent.LexiconTypeID = "tools.ozone.moderation.defs#accountEvent"
610
+
return json.Marshal(t.ModerationDefs_AccountEvent)
611
+
}
612
+
if t.ModerationDefs_IdentityEvent != nil {
613
+
t.ModerationDefs_IdentityEvent.LexiconTypeID = "tools.ozone.moderation.defs#identityEvent"
614
+
return json.Marshal(t.ModerationDefs_IdentityEvent)
615
+
}
616
+
if t.ModerationDefs_RecordEvent != nil {
617
+
t.ModerationDefs_RecordEvent.LexiconTypeID = "tools.ozone.moderation.defs#recordEvent"
618
+
return json.Marshal(t.ModerationDefs_RecordEvent)
619
+
}
536
620
return nil, fmt.Errorf("cannot marshal empty enum")
537
621
}
538
622
func (t *ModerationDefs_ModEventView_Event) UnmarshalJSON(b []byte) error {
···
587
671
case "tools.ozone.moderation.defs#modEventTag":
588
672
t.ModerationDefs_ModEventTag = new(ModerationDefs_ModEventTag)
589
673
return json.Unmarshal(b, t.ModerationDefs_ModEventTag)
674
+
case "tools.ozone.moderation.defs#accountEvent":
675
+
t.ModerationDefs_AccountEvent = new(ModerationDefs_AccountEvent)
676
+
return json.Unmarshal(b, t.ModerationDefs_AccountEvent)
677
+
case "tools.ozone.moderation.defs#identityEvent":
678
+
t.ModerationDefs_IdentityEvent = new(ModerationDefs_IdentityEvent)
679
+
return json.Unmarshal(b, t.ModerationDefs_IdentityEvent)
680
+
case "tools.ozone.moderation.defs#recordEvent":
681
+
t.ModerationDefs_RecordEvent = new(ModerationDefs_RecordEvent)
682
+
return json.Unmarshal(b, t.ModerationDefs_RecordEvent)
590
683
591
684
default:
592
685
return nil
···
646
739
SubjectStatus *ModerationDefs_SubjectStatusView `json:"subjectStatus,omitempty" cborgen:"subjectStatus,omitempty"`
647
740
}
648
741
742
+
// ModerationDefs_RecordEvent is a "recordEvent" in the tools.ozone.moderation.defs schema.
743
+
//
744
+
// Logs lifecycle event on a record subject. Normally captured by automod from the firehose and emitted to ozone for historical tracking.
745
+
//
746
+
// RECORDTYPE: ModerationDefs_RecordEvent
747
+
type ModerationDefs_RecordEvent struct {
748
+
LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#recordEvent" cborgen:"$type,const=tools.ozone.moderation.defs#recordEvent"`
749
+
Cid *string `json:"cid,omitempty" cborgen:"cid,omitempty"`
750
+
Comment *string `json:"comment,omitempty" cborgen:"comment,omitempty"`
751
+
Op string `json:"op" cborgen:"op"`
752
+
Timestamp string `json:"timestamp" cborgen:"timestamp"`
753
+
}
754
+
755
+
// ModerationDefs_RecordHosting is a "recordHosting" in the tools.ozone.moderation.defs schema.
756
+
//
757
+
// RECORDTYPE: ModerationDefs_RecordHosting
758
+
type ModerationDefs_RecordHosting struct {
759
+
LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#recordHosting" cborgen:"$type,const=tools.ozone.moderation.defs#recordHosting"`
760
+
CreatedAt *string `json:"createdAt,omitempty" cborgen:"createdAt,omitempty"`
761
+
DeletedAt *string `json:"deletedAt,omitempty" cborgen:"deletedAt,omitempty"`
762
+
Status string `json:"status" cborgen:"status"`
763
+
UpdatedAt *string `json:"updatedAt,omitempty" cborgen:"updatedAt,omitempty"`
764
+
}
765
+
649
766
// ModerationDefs_RecordView is a "recordView" in the tools.ozone.moderation.defs schema.
650
767
//
651
768
// RECORDTYPE: ModerationDefs_RecordView
···
737
854
// comment: Sticky comment on the subject.
738
855
Comment *string `json:"comment,omitempty" cborgen:"comment,omitempty"`
739
856
// createdAt: Timestamp referencing the first moderation status impacting event was emitted on the subject
740
-
CreatedAt string `json:"createdAt" cborgen:"createdAt"`
741
-
Id int64 `json:"id" cborgen:"id"`
857
+
CreatedAt string `json:"createdAt" cborgen:"createdAt"`
858
+
Hosting *ModerationDefs_SubjectStatusView_Hosting `json:"hosting,omitempty" cborgen:"hosting,omitempty"`
859
+
Id int64 `json:"id" cborgen:"id"`
742
860
// lastAppealedAt: Timestamp referencing when the author of the subject appealed a moderation action
743
861
LastAppealedAt *string `json:"lastAppealedAt,omitempty" cborgen:"lastAppealedAt,omitempty"`
744
862
LastReportedAt *string `json:"lastReportedAt,omitempty" cborgen:"lastReportedAt,omitempty"`
···
755
873
Takendown *bool `json:"takendown,omitempty" cborgen:"takendown,omitempty"`
756
874
// updatedAt: Timestamp referencing when the last update was made to the moderation status of the subject
757
875
UpdatedAt string `json:"updatedAt" cborgen:"updatedAt"`
876
+
}
877
+
878
+
type ModerationDefs_SubjectStatusView_Hosting struct {
879
+
ModerationDefs_AccountHosting *ModerationDefs_AccountHosting
880
+
ModerationDefs_RecordHosting *ModerationDefs_RecordHosting
881
+
}
882
+
883
+
func (t *ModerationDefs_SubjectStatusView_Hosting) MarshalJSON() ([]byte, error) {
884
+
if t.ModerationDefs_AccountHosting != nil {
885
+
t.ModerationDefs_AccountHosting.LexiconTypeID = "tools.ozone.moderation.defs#accountHosting"
886
+
return json.Marshal(t.ModerationDefs_AccountHosting)
887
+
}
888
+
if t.ModerationDefs_RecordHosting != nil {
889
+
t.ModerationDefs_RecordHosting.LexiconTypeID = "tools.ozone.moderation.defs#recordHosting"
890
+
return json.Marshal(t.ModerationDefs_RecordHosting)
891
+
}
892
+
return nil, fmt.Errorf("cannot marshal empty enum")
893
+
}
894
+
func (t *ModerationDefs_SubjectStatusView_Hosting) UnmarshalJSON(b []byte) error {
895
+
typ, err := util.TypeExtract(b)
896
+
if err != nil {
897
+
return err
898
+
}
899
+
900
+
switch typ {
901
+
case "tools.ozone.moderation.defs#accountHosting":
902
+
t.ModerationDefs_AccountHosting = new(ModerationDefs_AccountHosting)
903
+
return json.Unmarshal(b, t.ModerationDefs_AccountHosting)
904
+
case "tools.ozone.moderation.defs#recordHosting":
905
+
t.ModerationDefs_RecordHosting = new(ModerationDefs_RecordHosting)
906
+
return json.Unmarshal(b, t.ModerationDefs_RecordHosting)
907
+
908
+
default:
909
+
return nil
910
+
}
758
911
}
759
912
760
913
type ModerationDefs_SubjectStatusView_Subject struct {
+24
api/ozone/moderationemitEvent.go
+24
api/ozone/moderationemitEvent.go
···
37
37
ModerationDefs_ModEventResolveAppeal *ModerationDefs_ModEventResolveAppeal
38
38
ModerationDefs_ModEventEmail *ModerationDefs_ModEventEmail
39
39
ModerationDefs_ModEventTag *ModerationDefs_ModEventTag
40
+
ModerationDefs_AccountEvent *ModerationDefs_AccountEvent
41
+
ModerationDefs_IdentityEvent *ModerationDefs_IdentityEvent
42
+
ModerationDefs_RecordEvent *ModerationDefs_RecordEvent
40
43
}
41
44
42
45
func (t *ModerationEmitEvent_Input_Event) MarshalJSON() ([]byte, error) {
···
96
99
t.ModerationDefs_ModEventTag.LexiconTypeID = "tools.ozone.moderation.defs#modEventTag"
97
100
return json.Marshal(t.ModerationDefs_ModEventTag)
98
101
}
102
+
if t.ModerationDefs_AccountEvent != nil {
103
+
t.ModerationDefs_AccountEvent.LexiconTypeID = "tools.ozone.moderation.defs#accountEvent"
104
+
return json.Marshal(t.ModerationDefs_AccountEvent)
105
+
}
106
+
if t.ModerationDefs_IdentityEvent != nil {
107
+
t.ModerationDefs_IdentityEvent.LexiconTypeID = "tools.ozone.moderation.defs#identityEvent"
108
+
return json.Marshal(t.ModerationDefs_IdentityEvent)
109
+
}
110
+
if t.ModerationDefs_RecordEvent != nil {
111
+
t.ModerationDefs_RecordEvent.LexiconTypeID = "tools.ozone.moderation.defs#recordEvent"
112
+
return json.Marshal(t.ModerationDefs_RecordEvent)
113
+
}
99
114
return nil, fmt.Errorf("cannot marshal empty enum")
100
115
}
101
116
func (t *ModerationEmitEvent_Input_Event) UnmarshalJSON(b []byte) error {
···
147
162
case "tools.ozone.moderation.defs#modEventTag":
148
163
t.ModerationDefs_ModEventTag = new(ModerationDefs_ModEventTag)
149
164
return json.Unmarshal(b, t.ModerationDefs_ModEventTag)
165
+
case "tools.ozone.moderation.defs#accountEvent":
166
+
t.ModerationDefs_AccountEvent = new(ModerationDefs_AccountEvent)
167
+
return json.Unmarshal(b, t.ModerationDefs_AccountEvent)
168
+
case "tools.ozone.moderation.defs#identityEvent":
169
+
t.ModerationDefs_IdentityEvent = new(ModerationDefs_IdentityEvent)
170
+
return json.Unmarshal(b, t.ModerationDefs_IdentityEvent)
171
+
case "tools.ozone.moderation.defs#recordEvent":
172
+
t.ModerationDefs_RecordEvent = new(ModerationDefs_RecordEvent)
173
+
return json.Unmarshal(b, t.ModerationDefs_RecordEvent)
150
174
151
175
default:
152
176
return nil
+3
-2
api/ozone/moderationqueryEvents.go
+3
-2
api/ozone/moderationqueryEvents.go
···
21
21
// addedLabels: If specified, only events where all of these labels were added are returned
22
22
// addedTags: If specified, only events where all of these tags were added are returned
23
23
// collections: If specified, only events where the subject belongs to the given collections will be returned. When subjectType is set to 'account', this will be ignored.
24
-
// comment: If specified, only events with comments containing the keyword are returned
24
+
// comment: If specified, only events with comments containing the keyword are returned. Apply || separator to use multiple keywords and match using OR condition.
25
25
// createdAfter: Retrieve events created after a given timestamp
26
26
// createdBefore: Retrieve events created before a given timestamp
27
27
// hasComment: If true, only events with comments are returned
···
31
31
// sortDirection: Sort direction for the events. Defaults to descending order of created at timestamp.
32
32
// subjectType: If specified, only events where the subject is of the given type (account or record) will be returned. When this is set to 'account' the 'collections' parameter will be ignored. When includeAllUserRecords or subject is set, this will be ignored.
33
33
// types: The types of events (fully qualified string in the format of tools.ozone.moderation.defs#modEvent<name>) to filter by. If not specified, all events are returned.
34
-
func ModerationQueryEvents(ctx context.Context, c *xrpc.Client, addedLabels []string, addedTags []string, collections []string, comment string, createdAfter string, createdBefore string, createdBy string, cursor string, hasComment bool, includeAllUserRecords bool, limit int64, removedLabels []string, removedTags []string, reportTypes []string, sortDirection string, subject string, subjectType string, types []string) (*ModerationQueryEvents_Output, error) {
34
+
func ModerationQueryEvents(ctx context.Context, c *xrpc.Client, addedLabels []string, addedTags []string, collections []string, comment string, createdAfter string, createdBefore string, createdBy string, cursor string, hasComment bool, includeAllUserRecords bool, limit int64, policies []string, removedLabels []string, removedTags []string, reportTypes []string, sortDirection string, subject string, subjectType string, types []string) (*ModerationQueryEvents_Output, error) {
35
35
var out ModerationQueryEvents_Output
36
36
37
37
params := map[string]interface{}{
···
46
46
"hasComment": hasComment,
47
47
"includeAllUserRecords": includeAllUserRecords,
48
48
"limit": limit,
49
+
"policies": policies,
49
50
"removedLabels": removedLabels,
50
51
"removedTags": removedTags,
51
52
"reportTypes": reportTypes,
+17
-1
api/ozone/moderationqueryStatuses.go
+17
-1
api/ozone/moderationqueryStatuses.go
···
21
21
// appealed: Get subjects in unresolved appealed status
22
22
// collections: If specified, subjects belonging to the given collections will be returned. When subjectType is set to 'account', this will be ignored.
23
23
// comment: Search subjects by keyword from comments
24
+
// hostingDeletedAfter: Search subjects where the associated record/account was deleted after a given timestamp
25
+
// hostingDeletedBefore: Search subjects where the associated record/account was deleted before a given timestamp
26
+
// hostingStatuses: Search subjects by the status of the associated record/account
27
+
// hostingUpdatedAfter: Search subjects where the associated record/account was updated after a given timestamp
28
+
// hostingUpdatedBefore: Search subjects where the associated record/account was updated before a given timestamp
24
29
// includeAllUserRecords: All subjects, or subjects from given 'collections' param, belonging to the account specified in the 'subject' param will be returned.
25
30
// includeMuted: By default, we don't include muted subjects in the results. Set this to true to include them.
26
31
// lastReviewedBy: Get all subject statuses that were reviewed by a specific moderator
27
32
// onlyMuted: When set to true, only muted subjects and reporters will be returned.
33
+
// queueCount: Number of queues being used by moderators. Subjects will be split among all queues.
34
+
// queueIndex: Index of the queue to fetch subjects from. Works only when queueCount value is specified.
35
+
// queueSeed: A seeder to shuffle/balance the queue items.
28
36
// reportedAfter: Search subjects reported after a given timestamp
29
37
// reportedBefore: Search subjects reported before a given timestamp
30
38
// reviewState: Specify when fetching subjects in a certain state
···
33
41
// subject: The subject to get the status for.
34
42
// subjectType: If specified, subjects of the given type (account or record) will be returned. When this is set to 'account' the 'collections' parameter will be ignored. When includeAllUserRecords or subject is set, this will be ignored.
35
43
// takendown: Get subjects that were taken down
36
-
func ModerationQueryStatuses(ctx context.Context, c *xrpc.Client, appealed bool, collections []string, comment string, cursor string, excludeTags []string, ignoreSubjects []string, includeAllUserRecords bool, includeMuted bool, lastReviewedBy string, limit int64, onlyMuted bool, reportedAfter string, reportedBefore string, reviewState string, reviewedAfter string, reviewedBefore string, sortDirection string, sortField string, subject string, subjectType string, tags []string, takendown bool) (*ModerationQueryStatuses_Output, error) {
44
+
func ModerationQueryStatuses(ctx context.Context, c *xrpc.Client, appealed bool, collections []string, comment string, cursor string, excludeTags []string, hostingDeletedAfter string, hostingDeletedBefore string, hostingStatuses []string, hostingUpdatedAfter string, hostingUpdatedBefore string, ignoreSubjects []string, includeAllUserRecords bool, includeMuted bool, lastReviewedBy string, limit int64, onlyMuted bool, queueCount int64, queueIndex int64, queueSeed string, reportedAfter string, reportedBefore string, reviewState string, reviewedAfter string, reviewedBefore string, sortDirection string, sortField string, subject string, subjectType string, tags []string, takendown bool) (*ModerationQueryStatuses_Output, error) {
37
45
var out ModerationQueryStatuses_Output
38
46
39
47
params := map[string]interface{}{
···
42
50
"comment": comment,
43
51
"cursor": cursor,
44
52
"excludeTags": excludeTags,
53
+
"hostingDeletedAfter": hostingDeletedAfter,
54
+
"hostingDeletedBefore": hostingDeletedBefore,
55
+
"hostingStatuses": hostingStatuses,
56
+
"hostingUpdatedAfter": hostingUpdatedAfter,
57
+
"hostingUpdatedBefore": hostingUpdatedBefore,
45
58
"ignoreSubjects": ignoreSubjects,
46
59
"includeAllUserRecords": includeAllUserRecords,
47
60
"includeMuted": includeMuted,
48
61
"lastReviewedBy": lastReviewedBy,
49
62
"limit": limit,
50
63
"onlyMuted": onlyMuted,
64
+
"queueCount": queueCount,
65
+
"queueIndex": queueIndex,
66
+
"queueSeed": queueSeed,
51
67
"reportedAfter": reportedAfter,
52
68
"reportedBefore": reportedBefore,
53
69
"reviewState": reviewState,
+23
api/ozone/settingdefs.go
+23
api/ozone/settingdefs.go
···
1
+
// Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT.
2
+
3
+
package ozone
4
+
5
+
// schema: tools.ozone.setting.defs
6
+
7
+
import (
8
+
"github.com/bluesky-social/indigo/lex/util"
9
+
)
10
+
11
+
// SettingDefs_Option is a "option" in the tools.ozone.setting.defs schema.
12
+
type SettingDefs_Option struct {
13
+
CreatedAt *string `json:"createdAt,omitempty" cborgen:"createdAt,omitempty"`
14
+
CreatedBy string `json:"createdBy" cborgen:"createdBy"`
15
+
Description *string `json:"description,omitempty" cborgen:"description,omitempty"`
16
+
Did string `json:"did" cborgen:"did"`
17
+
Key string `json:"key" cborgen:"key"`
18
+
LastUpdatedBy string `json:"lastUpdatedBy" cborgen:"lastUpdatedBy"`
19
+
ManagerRole *string `json:"managerRole,omitempty" cborgen:"managerRole,omitempty"`
20
+
Scope string `json:"scope" cborgen:"scope"`
21
+
UpdatedAt *string `json:"updatedAt,omitempty" cborgen:"updatedAt,omitempty"`
22
+
Value *util.LexiconTypeDecoder `json:"value" cborgen:"value"`
23
+
}
+38
api/ozone/settinglistOptions.go
+38
api/ozone/settinglistOptions.go
···
1
+
// Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT.
2
+
3
+
package ozone
4
+
5
+
// schema: tools.ozone.setting.listOptions
6
+
7
+
import (
8
+
"context"
9
+
10
+
"github.com/bluesky-social/indigo/xrpc"
11
+
)
12
+
13
+
// SettingListOptions_Output is the output of a tools.ozone.setting.listOptions call.
14
+
type SettingListOptions_Output struct {
15
+
Cursor *string `json:"cursor,omitempty" cborgen:"cursor,omitempty"`
16
+
Options []*SettingDefs_Option `json:"options" cborgen:"options"`
17
+
}
18
+
19
+
// SettingListOptions calls the XRPC method "tools.ozone.setting.listOptions".
20
+
//
21
+
// keys: Filter for only the specified keys. Ignored if prefix is provided
22
+
// prefix: Filter keys by prefix
23
+
func SettingListOptions(ctx context.Context, c *xrpc.Client, cursor string, keys []string, limit int64, prefix string, scope string) (*SettingListOptions_Output, error) {
24
+
var out SettingListOptions_Output
25
+
26
+
params := map[string]interface{}{
27
+
"cursor": cursor,
28
+
"keys": keys,
29
+
"limit": limit,
30
+
"prefix": prefix,
31
+
"scope": scope,
32
+
}
33
+
if err := c.Do(ctx, xrpc.Query, "", "tools.ozone.setting.listOptions", params, nil, &out); err != nil {
34
+
return nil, err
35
+
}
36
+
37
+
return &out, nil
38
+
}
+31
api/ozone/settingremoveOptions.go
+31
api/ozone/settingremoveOptions.go
···
1
+
// Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT.
2
+
3
+
package ozone
4
+
5
+
// schema: tools.ozone.setting.removeOptions
6
+
7
+
import (
8
+
"context"
9
+
10
+
"github.com/bluesky-social/indigo/xrpc"
11
+
)
12
+
13
+
// SettingRemoveOptions_Input is the input argument to a tools.ozone.setting.removeOptions call.
14
+
type SettingRemoveOptions_Input struct {
15
+
Keys []string `json:"keys" cborgen:"keys"`
16
+
Scope string `json:"scope" cborgen:"scope"`
17
+
}
18
+
19
+
// SettingRemoveOptions_Output is the output of a tools.ozone.setting.removeOptions call.
20
+
type SettingRemoveOptions_Output struct {
21
+
}
22
+
23
+
// SettingRemoveOptions calls the XRPC method "tools.ozone.setting.removeOptions".
24
+
func SettingRemoveOptions(ctx context.Context, c *xrpc.Client, input *SettingRemoveOptions_Input) (*SettingRemoveOptions_Output, error) {
25
+
var out SettingRemoveOptions_Output
26
+
if err := c.Do(ctx, xrpc.Procedure, "application/json", "tools.ozone.setting.removeOptions", nil, input, &out); err != nil {
27
+
return nil, err
28
+
}
29
+
30
+
return &out, nil
31
+
}
+36
api/ozone/settingupsertOption.go
+36
api/ozone/settingupsertOption.go
···
1
+
// Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT.
2
+
3
+
package ozone
4
+
5
+
// schema: tools.ozone.setting.upsertOption
6
+
7
+
import (
8
+
"context"
9
+
10
+
"github.com/bluesky-social/indigo/lex/util"
11
+
"github.com/bluesky-social/indigo/xrpc"
12
+
)
13
+
14
+
// SettingUpsertOption_Input is the input argument to a tools.ozone.setting.upsertOption call.
15
+
type SettingUpsertOption_Input struct {
16
+
Description *string `json:"description,omitempty" cborgen:"description,omitempty"`
17
+
Key string `json:"key" cborgen:"key"`
18
+
ManagerRole *string `json:"managerRole,omitempty" cborgen:"managerRole,omitempty"`
19
+
Scope string `json:"scope" cborgen:"scope"`
20
+
Value *util.LexiconTypeDecoder `json:"value" cborgen:"value"`
21
+
}
22
+
23
+
// SettingUpsertOption_Output is the output of a tools.ozone.setting.upsertOption call.
24
+
type SettingUpsertOption_Output struct {
25
+
Option *SettingDefs_Option `json:"option" cborgen:"option"`
26
+
}
27
+
28
+
// SettingUpsertOption calls the XRPC method "tools.ozone.setting.upsertOption".
29
+
func SettingUpsertOption(ctx context.Context, c *xrpc.Client, input *SettingUpsertOption_Input) (*SettingUpsertOption_Output, error) {
30
+
var out SettingUpsertOption_Output
31
+
if err := c.Do(ctx, xrpc.Procedure, "application/json", "tools.ozone.setting.upsertOption", nil, input, &out); err != nil {
32
+
return nil, err
33
+
}
34
+
35
+
return &out, nil
36
+
}
+2
-8
atproto/data/data.go
+2
-8
atproto/data/data.go
···
66
66
out = append(out, v)
67
67
case []any:
68
68
for _, el := range v {
69
-
down := extractBlobsAtom(el)
70
-
for _, d := range down {
71
-
out = append(out, d)
72
-
}
69
+
out = append(out, extractBlobsAtom(el)...)
73
70
}
74
71
case map[string]any:
75
72
for _, val := range v {
76
-
down := extractBlobsAtom(val)
77
-
for _, d := range down {
78
-
out = append(out, d)
79
-
}
73
+
out = append(out, extractBlobsAtom(val)...)
80
74
}
81
75
default:
82
76
}
+4
-3
atproto/identity/base_directory.go
+4
-3
atproto/identity/base_directory.go
···
46
46
ident := ParseIdentity(doc)
47
47
declared, err := ident.DeclaredHandle()
48
48
if err != nil {
49
-
return nil, err
49
+
return nil, fmt.Errorf("could not verify handle/DID match: %w", err)
50
50
}
51
51
if declared != h {
52
-
return nil, ErrHandleMismatch
52
+
return nil, fmt.Errorf("%w: %s != %s", ErrHandleMismatch, declared, h)
53
53
}
54
54
ident.Handle = declared
55
55
···
66
66
if errors.Is(err, ErrHandleNotDeclared) {
67
67
ident.Handle = syntax.HandleInvalid
68
68
} else if err != nil {
69
-
return nil, err
69
+
return nil, fmt.Errorf("could not parse handle from DID document: %w", err)
70
70
} else {
71
71
// if a handle was declared, resolve it
72
72
resolvedDID, err := d.ResolveHandle(ctx, declared)
···
99
99
}
100
100
101
101
func (d *BaseDirectory) Purge(ctx context.Context, a syntax.AtIdentifier) error {
102
+
// BaseDirectory itself does not implement caching
102
103
return nil
103
104
}
+3
-3
atproto/identity/cache_directory.go
+3
-3
atproto/identity/cache_directory.go
···
93
93
94
94
func (d *CacheDirectory) ResolveHandle(ctx context.Context, h syntax.Handle) (syntax.DID, error) {
95
95
if h.IsInvalidHandle() {
96
-
return "", fmt.Errorf("invalid handle")
96
+
return "", fmt.Errorf("can not resolve handle: %w", ErrInvalidHandle)
97
97
}
98
98
entry, ok := d.handleCache.Get(h)
99
99
if ok && !d.IsHandleStale(&entry) {
···
230
230
231
231
declared, err := ident.DeclaredHandle()
232
232
if err != nil {
233
-
return nil, hit, err
233
+
return nil, hit, fmt.Errorf("could not verify handle/DID mapping: %w", err)
234
234
}
235
235
if declared != h {
236
-
return nil, hit, ErrHandleMismatch
236
+
return nil, hit, fmt.Errorf("%w: %s != %s", ErrHandleMismatch, declared, h)
237
237
}
238
238
return ident, hit, nil
239
239
}
-20
atproto/identity/did.go
-20
atproto/identity/did.go
···
13
13
"github.com/bluesky-social/indigo/atproto/syntax"
14
14
)
15
15
16
-
type DIDDocument struct {
17
-
DID syntax.DID `json:"id"`
18
-
AlsoKnownAs []string `json:"alsoKnownAs,omitempty"`
19
-
VerificationMethod []DocVerificationMethod `json:"verificationMethod,omitempty"`
20
-
Service []DocService `json:"service,omitempty"`
21
-
}
22
-
23
-
type DocVerificationMethod struct {
24
-
ID string `json:"id"`
25
-
Type string `json:"type"`
26
-
Controller string `json:"controller"`
27
-
PublicKeyMultibase string `json:"publicKeyMultibase"`
28
-
}
29
-
30
-
type DocService struct {
31
-
ID string `json:"id"`
32
-
Type string `json:"type"`
33
-
ServiceEndpoint string `json:"serviceEndpoint"`
34
-
}
35
-
36
16
// WARNING: this does *not* bi-directionally verify account metadata; it only implements direct DID-to-DID-document lookup for the supported DID methods, and parses the resulting DID Doc into an Identity struct
37
17
func (d *BaseDirectory) ResolveDID(ctx context.Context, did syntax.DID) (*DIDDocument, error) {
38
18
start := time.Now()
atproto/identity/did_test.go
atproto/identity/diddoc_text.go
atproto/identity/did_test.go
atproto/identity/diddoc_text.go
+25
atproto/identity/diddoc.go
+25
atproto/identity/diddoc.go
···
1
+
package identity
2
+
3
+
import (
4
+
"github.com/bluesky-social/indigo/atproto/syntax"
5
+
)
6
+
7
+
type DIDDocument struct {
8
+
DID syntax.DID `json:"id"`
9
+
AlsoKnownAs []string `json:"alsoKnownAs,omitempty"`
10
+
VerificationMethod []DocVerificationMethod `json:"verificationMethod,omitempty"`
11
+
Service []DocService `json:"service,omitempty"`
12
+
}
13
+
14
+
type DocVerificationMethod struct {
15
+
ID string `json:"id"`
16
+
Type string `json:"type"`
17
+
Controller string `json:"controller"`
18
+
PublicKeyMultibase string `json:"publicKeyMultibase"`
19
+
}
20
+
21
+
type DocService struct {
22
+
ID string `json:"id"`
23
+
Type string `json:"type"`
24
+
ServiceEndpoint string `json:"serviceEndpoint"`
25
+
}
+84
atproto/identity/directory.go
+84
atproto/identity/directory.go
···
1
+
package identity
2
+
3
+
import (
4
+
"context"
5
+
"errors"
6
+
"net"
7
+
"net/http"
8
+
"time"
9
+
10
+
"github.com/bluesky-social/indigo/atproto/syntax"
11
+
)
12
+
13
+
// API for doing account lookups by DID or handle, with bi-directional verification handled automatically. Almost all atproto services and clients should use an implementation of this interface instead of resolving handles or DIDs separately
14
+
//
15
+
// Handles which fail to resolve, or don't match DID alsoKnownAs, are an error. DIDs which resolve but the handle does not resolve back to the DID return an Identity where the Handle is the special `handle.invalid` value.
16
+
//
17
+
// Some example implementations of this interface could be:
18
+
// - basic direct resolution on every call
19
+
// - local in-memory caching layer to reduce network hits
20
+
// - API client, which just makes requests to PDS (or other remote service)
21
+
// - client for shared network cache (eg, Redis)
22
+
type Directory interface {
23
+
LookupHandle(ctx context.Context, h syntax.Handle) (*Identity, error)
24
+
LookupDID(ctx context.Context, d syntax.DID) (*Identity, error)
25
+
Lookup(ctx context.Context, i syntax.AtIdentifier) (*Identity, error)
26
+
27
+
// Flushes any cache of the indicated identifier. If directory is not using caching, can ignore this.
28
+
Purge(ctx context.Context, i syntax.AtIdentifier) error
29
+
}
30
+
31
+
// Indicates that handle resolution failed. A wrapped error may provide more context. This is only returned when looking up a handle, not when looking up a DID.
32
+
var ErrHandleResolutionFailed = errors.New("handle resolution failed")
33
+
34
+
// Indicates that resolution process completed successfully, but handle does not exist. This is only returned when looking up a handle, not when looking up a DID.
35
+
var ErrHandleNotFound = errors.New("handle not found")
36
+
37
+
// Indicates that resolution process completed successfully, handle mapped to a different DID. This is only returned when looking up a handle, not when looking up a DID.
38
+
var ErrHandleMismatch = errors.New("handle/DID mismatch")
39
+
40
+
// Indicates that DID document did not include any handle ("alsoKnownAs"). This is only returned when looking up a handle, not when looking up a DID.
41
+
var ErrHandleNotDeclared = errors.New("DID document did not declare a handle")
42
+
43
+
// Handle top-level domain (TLD) is one of the special "Reserved" suffixes, and not allowed for atproto use
44
+
var ErrHandleReservedTLD = errors.New("handle top-level domain is disallowed")
45
+
46
+
// Indicates that resolution process completed successfully, but the DID does not exist.
47
+
var ErrDIDNotFound = errors.New("DID not found")
48
+
49
+
// Indicates that DID resolution process failed. A wrapped error may provide more context.
50
+
var ErrDIDResolutionFailed = errors.New("DID resolution failed")
51
+
52
+
// Indicates that DID document did not include a public key with the specified ID
53
+
var ErrKeyNotDeclared = errors.New("DID document did not declare a relevant public key")
54
+
55
+
// Handle was invalid, in a situation where a valid handle is required.
56
+
var ErrInvalidHandle = errors.New("Invalid Handle")
57
+
58
+
var DefaultPLCURL = "https://plc.directory"
59
+
60
+
// Returns a reasonable Directory implementation for applications
61
+
func DefaultDirectory() Directory {
62
+
base := BaseDirectory{
63
+
PLCURL: DefaultPLCURL,
64
+
HTTPClient: http.Client{
65
+
Timeout: time.Second * 10,
66
+
Transport: &http.Transport{
67
+
// would want this around 100ms for services doing lots of handle resolution. Impacts PLC connections as well, but not too bad.
68
+
IdleConnTimeout: time.Millisecond * 1000,
69
+
MaxIdleConns: 100,
70
+
},
71
+
},
72
+
Resolver: net.Resolver{
73
+
Dial: func(ctx context.Context, network, address string) (net.Conn, error) {
74
+
d := net.Dialer{Timeout: time.Second * 3}
75
+
return d.DialContext(ctx, network, address)
76
+
},
77
+
},
78
+
TryAuthoritativeDNS: true,
79
+
// primary Bluesky PDS instance only supports HTTP resolution method
80
+
SkipDNSDomainSuffixes: []string{".bsky.social"},
81
+
}
82
+
cached := NewCacheDirectory(&base, 250_000, time.Hour*24, time.Minute*2, time.Minute*5)
83
+
return &cached
84
+
}
+8
-4
atproto/identity/handle.go
+8
-4
atproto/identity/handle.go
···
35
35
var dnsErr *net.DNSError
36
36
if errors.As(err, &dnsErr) {
37
37
if dnsErr.IsNotFound {
38
-
return "", ErrHandleNotFound
38
+
return "", fmt.Errorf("%w: %s", ErrHandleNotFound, handle)
39
39
}
40
40
}
41
41
if err != nil {
···
138
138
var dnsErr *net.DNSError
139
139
if errors.As(err, &dnsErr) {
140
140
if dnsErr.IsNotFound {
141
-
return "", fmt.Errorf("%w: DNS NXDOMAIN for %s", ErrHandleNotFound, handle)
141
+
return "", fmt.Errorf("%w: DNS NXDOMAIN for HTTP well-known resolution of %s", ErrHandleNotFound, handle)
142
142
}
143
143
}
144
144
return "", fmt.Errorf("%w: HTTP well-known request error: %w", ErrHandleResolutionFailed, err)
···
160
160
return "", fmt.Errorf("%w: HTTP well-known body read for %s: %w", ErrHandleResolutionFailed, handle, err)
161
161
}
162
162
line := strings.TrimSpace(string(b))
163
-
return syntax.ParseDID(line)
163
+
outDid, err := syntax.ParseDID(line)
164
+
if err != nil {
165
+
return outDid, fmt.Errorf("%w: invalid DID in HTTP well-known for %s", ErrHandleResolutionFailed, handle)
166
+
}
167
+
return outDid, err
164
168
}
165
169
166
170
func (d *BaseDirectory) ResolveHandle(ctx context.Context, handle syntax.Handle) (syntax.DID, error) {
···
169
173
var did syntax.DID
170
174
171
175
if handle.IsInvalidHandle() {
172
-
return "", fmt.Errorf("invalid handle")
176
+
return "", fmt.Errorf("can not resolve handle: %w", ErrInvalidHandle)
173
177
}
174
178
175
179
if !handle.AllowedTLD() {
-75
atproto/identity/identity.go
-75
atproto/identity/identity.go
···
1
1
package identity
2
2
3
3
import (
4
-
"context"
5
-
"errors"
6
4
"fmt"
7
-
"net"
8
-
"net/http"
9
5
"net/url"
10
6
"strings"
11
-
"time"
12
7
13
8
"github.com/bluesky-social/indigo/atproto/crypto"
14
9
"github.com/bluesky-social/indigo/atproto/syntax"
15
10
16
11
"github.com/mr-tron/base58"
17
12
)
18
-
19
-
// API for doing account lookups by DID or handle, with bi-directional verification handled automatically. Almost all atproto services and clients should use an implementation of this interface instead of resolving handles or DIDs separately
20
-
//
21
-
// Handles which fail to resolve, or don't match DID alsoKnownAs, are an error. DIDs which resolve but the handle does not resolve back to the DID return an Identity where the Handle is the special `handle.invalid` value.
22
-
//
23
-
// Some example implementations of this interface could be:
24
-
// - basic direct resolution on every call
25
-
// - local in-memory caching layer to reduce network hits
26
-
// - API client, which just makes requests to PDS (or other remote service)
27
-
// - client for shared network cache (eg, Redis)
28
-
type Directory interface {
29
-
LookupHandle(ctx context.Context, h syntax.Handle) (*Identity, error)
30
-
LookupDID(ctx context.Context, d syntax.DID) (*Identity, error)
31
-
Lookup(ctx context.Context, i syntax.AtIdentifier) (*Identity, error)
32
-
33
-
// Flushes any cache of the indicated identifier. If directory is not using caching, can ignore this.
34
-
Purge(ctx context.Context, i syntax.AtIdentifier) error
35
-
}
36
-
37
-
// Indicates that handle resolution failed. A wrapped error may provide more context. This is only returned when looking up a handle, not when looking up a DID.
38
-
var ErrHandleResolutionFailed = errors.New("handle resolution failed")
39
-
40
-
// Indicates that resolution process completed successfully, but handle does not exist. This is only returned when looking up a handle, not when looking up a DID.
41
-
var ErrHandleNotFound = errors.New("handle not found")
42
-
43
-
// Indicates that resolution process completed successfully, handle mapped to a different DID. This is only returned when looking up a handle, not when looking up a DID.
44
-
var ErrHandleMismatch = errors.New("handle/DID mismatch")
45
-
46
-
// Indicates that DID document did not include any handle ("alsoKnownAs"). This is only returned when looking up a handle, not when looking up a DID.
47
-
var ErrHandleNotDeclared = errors.New("DID document did not declare a handle")
48
-
49
-
// Handle top-level domain (TLD) is one of the special "Reserved" suffixes, and not allowed for atproto use
50
-
var ErrHandleReservedTLD = errors.New("handle top-level domain is disallowed")
51
-
52
-
// Indicates that resolution process completed successfully, but the DID does not exist.
53
-
var ErrDIDNotFound = errors.New("DID not found")
54
-
55
-
// Indicates that DID resolution process failed. A wrapped error may provide more context.
56
-
var ErrDIDResolutionFailed = errors.New("DID resolution failed")
57
-
58
-
// Indicates that DID document did not include a public key with the specified ID
59
-
var ErrKeyNotDeclared = errors.New("DID document did not declare a relevant public key")
60
-
61
-
var DefaultPLCURL = "https://plc.directory"
62
-
63
-
// Returns a reasonable Directory implementation for applications
64
-
func DefaultDirectory() Directory {
65
-
base := BaseDirectory{
66
-
PLCURL: DefaultPLCURL,
67
-
HTTPClient: http.Client{
68
-
Timeout: time.Second * 10,
69
-
Transport: &http.Transport{
70
-
// would want this around 100ms for services doing lots of handle resolution. Impacts PLC connections as well, but not too bad.
71
-
IdleConnTimeout: time.Millisecond * 1000,
72
-
MaxIdleConns: 100,
73
-
},
74
-
},
75
-
Resolver: net.Resolver{
76
-
Dial: func(ctx context.Context, network, address string) (net.Conn, error) {
77
-
d := net.Dialer{Timeout: time.Second * 3}
78
-
return d.DialContext(ctx, network, address)
79
-
},
80
-
},
81
-
TryAuthoritativeDNS: true,
82
-
// primary Bluesky PDS instance only supports HTTP resolution method
83
-
SkipDNSDomainSuffixes: []string{".bsky.social"},
84
-
}
85
-
cached := NewCacheDirectory(&base, 250_000, time.Hour*24, time.Minute*2, time.Minute*5)
86
-
return &cached
87
-
}
88
13
89
14
// Represents an atproto identity. Could be a regular user account, or a service account (eg, feed generator)
90
15
type Identity struct {
+12
-12
atproto/identity/redisdir/redis_directory.go
+12
-12
atproto/identity/redisdir/redis_directory.go
···
57
57
func NewRedisDirectory(inner identity.Directory, redisURL string, hitTTL, errTTL, invalidHandleTTL time.Duration, lruSize int) (*RedisDirectory, error) {
58
58
opt, err := redis.ParseURL(redisURL)
59
59
if err != nil {
60
-
return nil, err
60
+
return nil, fmt.Errorf("could not configure redis identity cache: %w", err)
61
61
}
62
62
rdb := redis.NewClient(opt)
63
63
// check redis connection
64
64
_, err = rdb.Ping(context.TODO()).Result()
65
65
if err != nil {
66
-
return nil, err
66
+
return nil, fmt.Errorf("could not connect to redis identity cache: %w", err)
67
67
}
68
68
handleCache := cache.New(&cache.Options{
69
69
Redis: rdb,
···
117
117
})
118
118
if err != nil {
119
119
he.DID = nil
120
-
he.Err = fmt.Errorf("identity cache write: %w", err)
120
+
he.Err = fmt.Errorf("identity cache write failed: %w", err)
121
121
return he
122
122
}
123
123
return he
···
142
142
})
143
143
if err != nil {
144
144
he.DID = nil
145
-
he.Err = fmt.Errorf("identity cache write: %w", err)
145
+
he.Err = fmt.Errorf("identity cache write failed: %w", err)
146
146
return he
147
147
}
148
148
err = d.handleCache.Set(&cache.Item{
···
153
153
})
154
154
if err != nil {
155
155
he.DID = nil
156
-
he.Err = fmt.Errorf("identity cache write: %w", err)
156
+
he.Err = fmt.Errorf("identity cache write failed: %w", err)
157
157
return he
158
158
}
159
159
return he
···
161
161
162
162
func (d *RedisDirectory) ResolveHandle(ctx context.Context, h syntax.Handle) (syntax.DID, error) {
163
163
if h.IsInvalidHandle() {
164
-
return "", errors.New("invalid handle")
164
+
return "", fmt.Errorf("can not resolve handle: %w", identity.ErrInvalidHandle)
165
165
}
166
166
var entry handleEntry
167
167
err := d.handleCache.Get(ctx, redisDirPrefix+h.String(), &entry)
168
168
if err != nil && err != cache.ErrCacheMiss {
169
-
return "", fmt.Errorf("identity cache read: %w", err)
169
+
return "", fmt.Errorf("identity cache read failed: %w", err)
170
170
}
171
171
if err == nil && !d.isHandleStale(&entry) { // if no error...
172
172
handleCacheHits.Inc()
···
191
191
// The result should now be in the cache
192
192
err := d.handleCache.Get(ctx, redisDirPrefix+h.String(), entry)
193
193
if err != nil && err != cache.ErrCacheMiss {
194
-
return "", fmt.Errorf("identity cache read: %w", err)
194
+
return "", fmt.Errorf("identity cache read failed: %w", err)
195
195
}
196
196
if err == nil && !d.isHandleStale(&entry) { // if no error...
197
197
if entry.Err != nil {
···
251
251
})
252
252
if err != nil {
253
253
entry.Identity = nil
254
-
entry.Err = fmt.Errorf("identity cache write: %v", err)
254
+
entry.Err = fmt.Errorf("identity cache write failed: %w", err)
255
255
return entry
256
256
}
257
257
if he != nil {
···
263
263
})
264
264
if err != nil {
265
265
entry.Identity = nil
266
-
entry.Err = fmt.Errorf("identity cache write: %v", err)
266
+
entry.Err = fmt.Errorf("identity cache write failed: %w", err)
267
267
return entry
268
268
}
269
269
}
···
279
279
var entry identityEntry
280
280
err := d.identityCache.Get(ctx, redisDirPrefix+did.String(), &entry)
281
281
if err != nil && err != cache.ErrCacheMiss {
282
-
return nil, false, fmt.Errorf("identity cache read: %v", err)
282
+
return nil, false, fmt.Errorf("identity cache read failed: %w", err)
283
283
}
284
284
if err == nil && !d.isIdentityStale(&entry) { // if no error...
285
285
identityCacheHits.Inc()
···
298
298
// The result should now be in the cache
299
299
err = d.identityCache.Get(ctx, redisDirPrefix+did.String(), &entry)
300
300
if err != nil && err != cache.ErrCacheMiss {
301
-
return nil, false, fmt.Errorf("identity cache read: %v", err)
301
+
return nil, false, fmt.Errorf("identity cache read failed: %w", err)
302
302
}
303
303
if err == nil && !d.isIdentityStale(&entry) { // if no error...
304
304
return entry.Identity, false, entry.Err
+1
-1
atproto/lexicon/language.go
+1
-1
atproto/lexicon/language.go
+21
atproto/syntax/cmd/atp-syntax/main.go
+21
atproto/syntax/cmd/atp-syntax/main.go
···
22
22
ArgsUsage: "<tid>",
23
23
Action: runParseTID,
24
24
},
25
+
&cli.Command{
26
+
Name: "parse-did",
27
+
Usage: "parse a DID",
28
+
ArgsUsage: "<did>",
29
+
Action: runParseDID,
30
+
},
25
31
}
26
32
h := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug})
27
33
slog.SetDefault(slog.New(h))
···
43
49
44
50
return nil
45
51
}
52
+
53
+
func runParseDID(cctx *cli.Context) error {
54
+
s := cctx.Args().First()
55
+
if s == "" {
56
+
return fmt.Errorf("need to provide identifier as an argument")
57
+
}
58
+
59
+
did, err := syntax.ParseDID(s)
60
+
if err != nil {
61
+
return err
62
+
}
63
+
fmt.Printf("%s\n", did)
64
+
65
+
return nil
66
+
}
+22
atproto/syntax/did.go
+22
atproto/syntax/did.go
···
14
14
type DID string
15
15
16
16
var didRegex = regexp.MustCompile(`^did:[a-z]+:[a-zA-Z0-9._:%-]*[a-zA-Z0-9._-]$`)
17
+
var plcChars = ""
18
+
19
+
func isASCIIAlphaNum(c rune) bool {
20
+
if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') {
21
+
return true
22
+
}
23
+
return false
24
+
}
17
25
18
26
func ParseDID(raw string) (DID, error) {
27
+
// fast-path for did:plc, avoiding regex
28
+
if len(raw) == 32 && strings.HasPrefix(raw, "did:plc:") {
29
+
// NOTE: this doesn't really check base32, just broader alphanumberic. might pass invalid PLC DIDs, but they still have overall valid DID syntax
30
+
isPlc := true
31
+
for _, c := range raw[8:32] {
32
+
if !isASCIIAlphaNum(c) {
33
+
isPlc = false
34
+
break
35
+
}
36
+
}
37
+
if isPlc {
38
+
return DID(raw), nil
39
+
}
40
+
}
19
41
if raw == "" {
20
42
return "", errors.New("expected DID, got empty string")
21
43
}
+26
atproto/syntax/path.go
+26
atproto/syntax/path.go
···
1
+
package syntax
2
+
3
+
import (
4
+
"errors"
5
+
"fmt"
6
+
"strings"
7
+
)
8
+
9
+
// Parses an atproto repo path string in to "collection" (NSID) and record key parts.
10
+
//
11
+
// Does not return partial success: either both collection and record key are complete (and error is nil), or both are empty string (and error is not nil)
12
+
func ParseRepoPath(raw string) (NSID, RecordKey, error) {
13
+
parts := strings.SplitN(raw, "/", 3)
14
+
if len(parts) != 2 {
15
+
return "", "", errors.New("expected path to have two parts, separated by single slash")
16
+
}
17
+
nsid, err := ParseNSID(parts[0])
18
+
if err != nil {
19
+
return "", "", fmt.Errorf("collection part of path not a valid NSID: %w", err)
20
+
}
21
+
rkey, err := ParseRecordKey(parts[1])
22
+
if err != nil {
23
+
return "", "", fmt.Errorf("record key part of path not valid: %w", err)
24
+
}
25
+
return nsid, rkey, nil
26
+
}
+41
atproto/syntax/path_test.go
+41
atproto/syntax/path_test.go
···
1
+
package syntax
2
+
3
+
import (
4
+
"testing"
5
+
6
+
"github.com/stretchr/testify/assert"
7
+
)
8
+
9
+
func TestRepoPath(t *testing.T) {
10
+
assert := assert.New(t)
11
+
12
+
testValid := [][]string{
13
+
{"app.bsky.feed.post/asdf", "app.bsky.feed.post", "asdf"},
14
+
}
15
+
16
+
testErr := []string{
17
+
"",
18
+
"/",
19
+
"/app.bsky.feed.post/asdf",
20
+
"/asdf",
21
+
"./app.bsky.feed.post",
22
+
"blob/asdf",
23
+
"app.bsky.feed.post/",
24
+
"app.bsky.feed.post/.",
25
+
"app.bsky.feed.post/!",
26
+
}
27
+
28
+
for _, parts := range testValid {
29
+
nsid, rkey, err := ParseRepoPath(parts[0])
30
+
assert.NoError(err)
31
+
assert.Equal(parts[1], nsid.String())
32
+
assert.Equal(parts[2], rkey.String())
33
+
}
34
+
35
+
for _, raw := range testErr {
36
+
nsid, rkey, err := ParseRepoPath(raw)
37
+
assert.Error(err)
38
+
assert.Equal("", nsid.String())
39
+
assert.Equal("", rkey.String())
40
+
}
41
+
}
+3
-3
automod/consumer/firehose.go
+3
-3
automod/consumer/firehose.go
···
116
116
fc.Logger.Info("hepa scheduler configured", "scheduler", "autoscaling", "initial", scaleSettings.Concurrency, "max", scaleSettings.MaxConcurrency)
117
117
}
118
118
119
-
return events.HandleRepoStream(ctx, con, scheduler)
119
+
return events.HandleRepoStream(ctx, con, scheduler, fc.Logger)
120
120
}
121
121
122
122
// NOTE: for now, this function basically never errors, just logs and returns nil. Should think through error processing better.
···
144
144
145
145
for _, op := range evt.Ops {
146
146
logger = logger.With("eventKind", op.Action, "path", op.Path)
147
-
collection, rkey, err := splitRepoPath(op.Path)
147
+
collection, rkey, err := syntax.ParseRepoPath(op.Path)
148
148
if err != nil {
149
-
logger.Error("invalid path in repo op")
149
+
logger.Error("invalid path in repo op", "err", err)
150
150
return nil
151
151
}
152
152
+1
automod/consumer/ozone.go
+1
automod/consumer/ozone.go
-25
automod/consumer/util.go
-25
automod/consumer/util.go
···
1
-
package consumer
2
-
3
-
import (
4
-
"fmt"
5
-
"strings"
6
-
7
-
"github.com/bluesky-social/indigo/atproto/syntax"
8
-
)
9
-
10
-
// TODO: move this to a "ParsePath" helper in syntax package?
11
-
func splitRepoPath(path string) (syntax.NSID, syntax.RecordKey, error) {
12
-
parts := strings.SplitN(path, "/", 3)
13
-
if len(parts) != 2 {
14
-
return "", "", fmt.Errorf("invalid record path: %s", path)
15
-
}
16
-
collection, err := syntax.ParseNSID(parts[0])
17
-
if err != nil {
18
-
return "", "", err
19
-
}
20
-
rkey, err := syntax.ParseRecordKey(parts[1])
21
-
if err != nil {
22
-
return "", "", err
23
-
}
24
-
return collection, rkey, nil
25
-
}
+4
-4
automod/engine/circuit_breaker_test.go
+4
-4
automod/engine/circuit_breaker_test.go
···
44
44
p1cbor := p1buf.Bytes()
45
45
46
46
// generate double the quote of events; expect to only count the quote worth of actions
47
-
for i := 0; i < 2*QuotaModTakedownDay; i++ {
47
+
for i := 0; i < 2*eng.Config.QuotaModTakedownDay; i++ {
48
48
ident := identity.Identity{
49
49
DID: syntax.DID(fmt.Sprintf("did:plc:abc%d", i)),
50
50
Handle: syntax.Handle("handle.example.com"),
···
63
63
64
64
takedowns, err := eng.Counters.GetCount(ctx, "automod-quota", "takedown", countstore.PeriodDay)
65
65
assert.NoError(err)
66
-
assert.Equal(QuotaModTakedownDay, takedowns)
66
+
assert.Equal(eng.Config.QuotaModTakedownDay, takedowns)
67
67
68
68
reports, err := eng.Counters.GetCount(ctx, "automod-quota", "report", countstore.PeriodDay)
69
69
assert.NoError(err)
···
89
89
p1cbor := p1buf.Bytes()
90
90
91
91
// generate double the quota of events; expect to only count the quota worth of actions
92
-
for i := 0; i < 2*QuotaModReportDay; i++ {
92
+
for i := 0; i < 2*eng.Config.QuotaModReportDay; i++ {
93
93
ident := identity.Identity{
94
94
DID: syntax.DID(fmt.Sprintf("did:plc:abc%d", i)),
95
95
Handle: syntax.Handle("handle.example.com"),
···
112
112
113
113
reports, err := eng.Counters.GetCount(ctx, "automod-quota", "report", countstore.PeriodDay)
114
114
assert.NoError(err)
115
-
assert.Equal(QuotaModReportDay, reports)
115
+
assert.Equal(eng.Config.QuotaModReportDay, reports)
116
116
}
-12
automod/engine/effects.go
-12
automod/engine/effects.go
···
2
2
3
3
import (
4
4
"sync"
5
-
"time"
6
-
)
7
-
8
-
var (
9
-
// time period within which automod will not re-report an account for the same reasonType
10
-
ReportDupePeriod = 1 * 24 * time.Hour
11
-
// number of reports automod can file per day, for all subjects and types combined (circuit breaker)
12
-
QuotaModReportDay = 2000
13
-
// number of takedowns automod can action per day, for all subjects combined (circuit breaker)
14
-
QuotaModTakedownDay = 200
15
-
// number of misc actions automod can do per day, for all subjects combined (circuit breaker)
16
-
QuotaModActionDay = 1000
17
5
)
18
6
19
7
type CounterRef struct {
+8
automod/engine/engine.go
+8
automod/engine/engine.go
···
52
52
type EngineConfig struct {
53
53
// if enabled, account metadata is not hydrated for every event by default
54
54
SkipAccountMeta bool
55
+
// time period within which automod will not re-report an account for the same reasonType
56
+
ReportDupePeriod time.Duration
57
+
// number of reports automod can file per day, for all subjects and types combined (circuit breaker)
58
+
QuotaModReportDay int
59
+
// number of takedowns automod can action per day, for all subjects combined (circuit breaker)
60
+
QuotaModTakedownDay int
61
+
// number of misc actions automod can do per day, for all subjects combined (circuit breaker)
62
+
QuotaModActionDay int
55
63
}
56
64
57
65
// Entrypoint for external code pushing #identity events in to the engine.
+6
-4
automod/engine/fetch_account_meta.go
+6
-4
automod/engine/fetch_account_meta.go
···
139
139
ap.AccountTags = dedupeStrings(rd.Moderation.SubjectStatus.Tags)
140
140
if rd.Moderation.SubjectStatus.ReviewState != nil {
141
141
switch *rd.Moderation.SubjectStatus.ReviewState {
142
-
case "#reviewOpen":
142
+
case "tools.ozone.moderation.defs#reviewOpen":
143
143
ap.ReviewState = ReviewStateOpen
144
-
case "#reviewEscalated":
144
+
case "tools.ozone.moderation.defs#reviewEscalated":
145
145
ap.ReviewState = ReviewStateEscalated
146
-
case "#reviewClosed":
146
+
case "tools.ozone.moderation.defs#reviewClosed":
147
147
ap.ReviewState = ReviewStateClosed
148
-
case "#reviewNonde":
148
+
case "tools.ozone.moderation.defs#reviewNone":
149
149
ap.ReviewState = ReviewStateNone
150
+
default:
151
+
logger.Warn("unexpected ozone moderation review state", "state", rd.Moderation.SubjectStatus.ReviewState, "did", ident.DID)
150
152
}
151
153
}
152
154
}
+28
-5
automod/engine/persisthelpers.go
+28
-5
automod/engine/persisthelpers.go
···
98
98
if err != nil {
99
99
return nil, fmt.Errorf("checking report action quota: %w", err)
100
100
}
101
-
if c >= QuotaModReportDay {
101
+
102
+
quotaModReportDay := eng.Config.QuotaModReportDay
103
+
if quotaModReportDay == 0 {
104
+
quotaModReportDay = 10000
105
+
}
106
+
if c >= quotaModReportDay {
102
107
eng.Logger.Warn("CIRCUIT BREAKER: automod reports")
103
108
return []ModReport{}, nil
104
109
}
···
117
122
if err != nil {
118
123
return false, fmt.Errorf("checking takedown action quota: %w", err)
119
124
}
120
-
if c >= QuotaModTakedownDay {
125
+
quotaModTakedownDay := eng.Config.QuotaModTakedownDay
126
+
if quotaModTakedownDay == 0 {
127
+
quotaModTakedownDay = 200
128
+
}
129
+
if c >= quotaModTakedownDay {
121
130
eng.Logger.Warn("CIRCUIT BREAKER: automod takedowns")
122
131
return false, nil
123
132
}
···
137
146
if err != nil {
138
147
return false, fmt.Errorf("checking mod action quota: %w", err)
139
148
}
140
-
if c >= QuotaModActionDay {
149
+
quotaModActionDay := eng.Config.QuotaModActionDay
150
+
if quotaModActionDay == 0 {
151
+
quotaModActionDay = 2000
152
+
}
153
+
if c >= quotaModActionDay {
141
154
eng.Logger.Warn("CIRCUIT BREAKER: automod action")
142
155
return false, nil
143
156
}
···
169
182
false, // hasComment bool
170
183
false, // includeAllUserRecords bool
171
184
5, // limit int64
185
+
nil, // policies []string
172
186
nil, // removedLabels []string
173
187
nil, // removedTags []string
174
188
nil, // reportTypes []string
···
191
205
if err != nil {
192
206
return false, err
193
207
}
194
-
if time.Since(created.Time()) > ReportDupePeriod {
208
+
reportDupePeriod := eng.Config.ReportDupePeriod
209
+
if reportDupePeriod == 0 {
210
+
reportDupePeriod = 1 * 24 * time.Hour
211
+
}
212
+
if time.Since(created.Time()) > reportDupePeriod {
195
213
continue
196
214
}
197
215
···
246
264
false, // hasComment bool
247
265
false, // includeAllUserRecords bool
248
266
5, // limit int64
267
+
nil, // policies []string
249
268
nil, // removedLabels []string
250
269
nil, // removedTags []string
251
270
nil, // reportTypes []string
···
267
286
if err != nil {
268
287
return false, err
269
288
}
270
-
if time.Since(created.Time()) > ReportDupePeriod {
289
+
reportDupePeriod := eng.Config.ReportDupePeriod
290
+
if reportDupePeriod == 0 {
291
+
reportDupePeriod = 1 * 24 * time.Hour
292
+
}
293
+
if time.Since(created.Time()) > reportDupePeriod {
271
294
continue
272
295
}
273
296
+1
-1
automod/engine/slack.go
+1
-1
automod/engine/slack.go
+1
-3
automod/flagstore/flagstore_mem.go
+1
-3
automod/flagstore/flagstore_mem.go
+1
-3
automod/helpers/bsky.go
+1
-3
automod/helpers/bsky.go
···
11
11
12
12
func ExtractHashtagsPost(post *appbsky.FeedPost) []string {
13
13
var tags []string
14
-
for _, tag := range post.Tags {
15
-
tags = append(tags, tag)
16
-
}
14
+
tags = append(tags, post.Tags...)
17
15
for _, facet := range post.Facets {
18
16
for _, feat := range facet.Features {
19
17
if feat.RichtextFacet_Tag != nil {
+1
-1
automod/rules/harassment.go
+1
-1
automod/rules/harassment.go
···
130
130
131
131
if count > 5 {
132
132
//c.AddRecordFlag("trivial-harassing-post")
133
-
c.ReportAccount(automod.ReportReasonOther, fmt.Sprintf("possible targetted harassment (also labeled; remove label if this isn't harassment!)"))
133
+
c.ReportAccount(automod.ReportReasonOther, "possible targetted harassment (also labeled; remove label if this isn't harassment!)")
134
134
c.AddAccountLabel("!hide")
135
135
c.Notify("slack")
136
136
}
+1
-2
automod/rules/nostr.go
+1
-2
automod/rules/nostr.go
···
1
1
package rules
2
2
3
3
import (
4
-
"fmt"
5
4
"strings"
6
5
"time"
7
6
···
37
36
return nil
38
37
}
39
38
40
-
c.ReportAccount(automod.ReportReasonOther, fmt.Sprintf("likely nostr spam account (also labeled; remove label if this isn't spam!)"))
39
+
c.ReportAccount(automod.ReportReasonOther, "likely nostr spam account (also labeled; remove label if this isn't spam!)")
41
40
c.AddAccountLabel("!hide")
42
41
c.Notify("slack")
43
42
return nil
+1
-2
automod/rules/promo.go
+1
-2
automod/rules/promo.go
···
1
1
package rules
2
2
3
3
import (
4
-
"fmt"
5
4
"net/url"
6
5
"strings"
7
6
"time"
···
54
53
uniqueReplies := c.GetCountDistinct("reply-to", did, countstore.PeriodDay)
55
54
if uniqueReplies >= 10 {
56
55
c.AddAccountFlag("promo-multi-reply")
57
-
c.ReportAccount(automod.ReportReasonSpam, fmt.Sprintf("possible aggressive self-promotion"))
56
+
c.ReportAccount(automod.ReportReasonSpam, "possible aggressive self-promotion")
58
57
c.Notify("slack")
59
58
}
60
59
+2
-2
automod/rules/quick.go
+2
-2
automod/rules/quick.go
···
29
29
}
30
30
if strings.Contains(*profile.Description, "🏈🍕🌀") {
31
31
c.AddAccountFlag("profile-bot-string")
32
-
c.ReportAccount(automod.ReportReasonSpam, fmt.Sprintf("possible bot based on string in profile"))
32
+
c.ReportAccount(automod.ReportReasonSpam, "possible bot based on string in profile")
33
33
c.Notify("slack")
34
34
return nil
35
35
}
···
89
89
return nil
90
90
}
91
91
92
-
c.ReportAccount(automod.ReportReasonOther, fmt.Sprintf("trivial spam account (also labeled; remove label if this isn't spam!)"))
92
+
c.ReportAccount(automod.ReportReasonOther, "trivial spam account (also labeled; remove label if this isn't spam!)")
93
93
c.AddAccountLabel("!hide")
94
94
c.Notify("slack")
95
95
return nil
+68
-33
backfill/backfill.go
+68
-33
backfill/backfill.go
···
12
12
"time"
13
13
14
14
"github.com/bluesky-social/indigo/api/atproto"
15
+
"github.com/bluesky-social/indigo/atproto/identity"
16
+
"github.com/bluesky-social/indigo/atproto/syntax"
15
17
"github.com/bluesky-social/indigo/repo"
16
18
"github.com/bluesky-social/indigo/repomgr"
19
+
17
20
"github.com/ipfs/go-cid"
18
21
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
19
22
"go.opentelemetry.io/otel"
···
71
74
ParallelRecordCreates int
72
75
// Prefix match for records to backfill i.e. app.bsky.feed.app/
73
76
// If empty, all records will be backfilled
74
-
NSIDFilter string
75
-
CheckoutPath string
77
+
NSIDFilter string
78
+
RelayHost string
76
79
77
80
syncLimiter *rate.Limiter
78
81
···
80
83
magicHeaderVal string
81
84
82
85
stop chan chan struct{}
86
+
87
+
Directory identity.Directory
83
88
}
84
89
85
90
var (
···
110
115
ParallelRecordCreates int
111
116
NSIDFilter string
112
117
SyncRequestsPerSecond int
113
-
CheckoutPath string
118
+
RelayHost string
114
119
}
115
120
116
121
func DefaultBackfillOptions() *BackfillOptions {
···
119
124
ParallelRecordCreates: 100,
120
125
NSIDFilter: "",
121
126
SyncRequestsPerSecond: 2,
122
-
CheckoutPath: "https://bsky.network/xrpc/com.atproto.sync.getRepo",
127
+
RelayHost: "https://bsky.network",
123
128
}
124
129
}
125
130
···
135
140
if opts == nil {
136
141
opts = DefaultBackfillOptions()
137
142
}
143
+
144
+
// Convert wss:// or ws:// to https:// or http://
145
+
if strings.HasPrefix(opts.RelayHost, "wss://") {
146
+
opts.RelayHost = "https://" + opts.RelayHost[6:]
147
+
} else if strings.HasPrefix(opts.RelayHost, "ws://") {
148
+
opts.RelayHost = "http://" + opts.RelayHost[5:]
149
+
}
150
+
138
151
return &Backfiller{
139
152
Name: name,
140
153
Store: store,
···
145
158
ParallelRecordCreates: opts.ParallelRecordCreates,
146
159
NSIDFilter: opts.NSIDFilter,
147
160
syncLimiter: rate.NewLimiter(rate.Limit(opts.SyncRequestsPerSecond), 1),
148
-
CheckoutPath: opts.CheckoutPath,
161
+
RelayHost: opts.RelayHost,
149
162
stop: make(chan chan struct{}, 1),
163
+
Directory: identity.DefaultDirectory(),
150
164
}
151
165
}
152
166
···
292
306
err error
293
307
}
294
308
295
-
// BackfillRepo backfills a repo
296
-
func (b *Backfiller) BackfillRepo(ctx context.Context, job Job) (string, error) {
297
-
ctx, span := tracer.Start(ctx, "BackfillRepo")
298
-
defer span.End()
309
+
// Fetches a repo CAR file over HTTP from the indicated host. If successful, parses the CAR and returns repo.Repo
310
+
func (b *Backfiller) fetchRepo(ctx context.Context, did, since, host string) (*repo.Repo, error) {
311
+
url := fmt.Sprintf("%s/xrpc/com.atproto.sync.getRepo?did=%s", host, did)
299
312
300
-
start := time.Now()
301
-
302
-
repoDid := job.Repo()
303
-
304
-
log := slog.With("source", "backfiller_backfill_repo", "repo", repoDid)
305
-
if job.RetryCount() > 0 {
306
-
log = log.With("retry_count", job.RetryCount())
307
-
}
308
-
log.Info(fmt.Sprintf("processing backfill for %s", repoDid))
309
-
310
-
url := fmt.Sprintf("%s?did=%s", b.CheckoutPath, repoDid)
311
-
312
-
if job.Rev() != "" {
313
-
url = url + fmt.Sprintf("&since=%s", job.Rev())
313
+
if since != "" {
314
+
url = url + fmt.Sprintf("&since=%s", since)
314
315
}
315
316
316
317
// GET and CAR decode the body
···
320
321
}
321
322
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
322
323
if err != nil {
323
-
state := fmt.Sprintf("failed (create request: %s)", err.Error())
324
-
return state, fmt.Errorf("failed to create request: %w", err)
324
+
return nil, fmt.Errorf("failed to create request: %w", err)
325
325
}
326
326
327
327
req.Header.Set("Accept", "application/vnd.ipld.car")
···
334
334
335
335
resp, err := client.Do(req)
336
336
if err != nil {
337
-
state := fmt.Sprintf("failed (do request: %s)", err.Error())
338
-
return state, fmt.Errorf("failed to send request: %w", err)
337
+
return nil, fmt.Errorf("failed to send request: %w", err)
339
338
}
340
339
341
340
if resp.StatusCode != http.StatusOK {
···
345
344
} else {
346
345
reason = resp.Status
347
346
}
348
-
state := fmt.Sprintf("failed (%s)", reason)
349
-
return state, fmt.Errorf("failed to get repo: %s", reason)
347
+
return nil, fmt.Errorf("failed to get repo: %s", reason)
350
348
}
351
349
352
350
instrumentedReader := instrumentedReader{
···
356
354
357
355
defer instrumentedReader.Close()
358
356
359
-
r, err := repo.ReadRepoFromCar(ctx, instrumentedReader)
357
+
repo, err := repo.ReadRepoFromCar(ctx, instrumentedReader)
360
358
if err != nil {
361
-
state := "failed (couldn't read repo CAR from response body)"
362
-
return state, fmt.Errorf("failed to read repo from car: %w", err)
359
+
return nil, fmt.Errorf("failed to parse repo from CAR file: %w", err)
360
+
}
361
+
return repo, nil
362
+
}
363
+
364
+
// BackfillRepo backfills a repo
365
+
func (b *Backfiller) BackfillRepo(ctx context.Context, job Job) (string, error) {
366
+
ctx, span := tracer.Start(ctx, "BackfillRepo")
367
+
defer span.End()
368
+
369
+
start := time.Now()
370
+
371
+
repoDID := job.Repo()
372
+
373
+
log := slog.With("source", "backfiller_backfill_repo", "repo", repoDID)
374
+
if job.RetryCount() > 0 {
375
+
log = log.With("retry_count", job.RetryCount())
376
+
}
377
+
log.Info(fmt.Sprintf("processing backfill for %s", repoDID))
378
+
379
+
// first try with Relay endpoint
380
+
r, err := b.fetchRepo(ctx, repoDID, job.Rev(), b.RelayHost)
381
+
if err != nil {
382
+
slog.Warn("repo CAR fetch from relay failed", "did", repoDID, "since", job.Rev(), "relayHost", b.RelayHost, "err", err)
383
+
// fallback to direct PDS fetch
384
+
ident, err := b.Directory.LookupDID(ctx, syntax.DID(repoDID))
385
+
if err != nil {
386
+
return "failed resolving DID to PDS repo", fmt.Errorf("resolving DID for PDS repo fetch: %w", err)
387
+
}
388
+
pdsHost := ident.PDSEndpoint()
389
+
if pdsHost == "" {
390
+
return "DID document missing PDS endpoint", fmt.Errorf("no PDS endpoint for DID: %s", repoDID)
391
+
}
392
+
r, err = b.fetchRepo(ctx, repoDID, job.Rev(), pdsHost)
393
+
if err != nil {
394
+
slog.Warn("repo CAR fetch from PDS failed", "did", repoDID, "since", job.Rev(), "pdsHost", pdsHost, "err", err)
395
+
return "repo CAR fetch from PDS failed", err
396
+
}
397
+
slog.Info("repo CAR fetch from PDS successful", "did", repoDID, "since", job.Rev(), "pdsHost", pdsHost, "err", err)
363
398
}
364
399
365
400
numRecords := 0
···
396
431
397
432
raw := blk.RawData()
398
433
399
-
err = b.HandleCreateRecord(ctx, repoDid, rev, item.recordPath, &raw, &item.nodeCid)
434
+
err = b.HandleCreateRecord(ctx, repoDID, rev, item.recordPath, &raw, &item.nodeCid)
400
435
if err != nil {
401
436
recordResults <- recordResult{recordPath: item.recordPath, err: fmt.Errorf("failed to handle create record: %w", err)}
402
437
continue
+4
-1
bgs/admin.go
+4
-1
bgs/admin.go
···
264
264
return err
265
265
}
266
266
267
+
// don't care if this errors, but we should try to disconnect something we just blocked
268
+
_ = bgs.slurper.KillUpstreamConnection(host, false)
269
+
267
270
return e.JSON(200, map[string]any{
268
271
"success": "true",
269
272
})
···
484
487
ctx := context.Background()
485
488
err := bgs.ResyncPDS(ctx, pds)
486
489
if err != nil {
487
-
log.Errorw("failed to resync PDS", "err", err, "pds", pds.Host)
490
+
log.Error("failed to resync PDS", "err", err, "pds", pds.Host)
488
491
}
489
492
}()
490
493
+172
-86
bgs/bgs.go
+172
-86
bgs/bgs.go
···
6
6
"encoding/json"
7
7
"errors"
8
8
"fmt"
9
+
"log/slog"
9
10
"net"
10
11
"net/http"
11
12
_ "net/http/pprof"
12
13
"net/url"
14
+
"reflect"
13
15
"strconv"
14
16
"strings"
15
17
"sync"
···
27
29
"github.com/bluesky-social/indigo/models"
28
30
"github.com/bluesky-social/indigo/repomgr"
29
31
"github.com/bluesky-social/indigo/xrpc"
32
+
lru "github.com/hashicorp/golang-lru/v2"
30
33
"golang.org/x/sync/semaphore"
31
34
"golang.org/x/time/rate"
32
35
33
36
"github.com/gorilla/websocket"
34
37
"github.com/ipfs/go-cid"
35
38
ipld "github.com/ipfs/go-ipld-format"
36
-
logging "github.com/ipfs/go-log"
37
39
"github.com/labstack/echo/v4"
38
40
"github.com/labstack/echo/v4/middleware"
39
41
promclient "github.com/prometheus/client_golang/prometheus"
···
44
46
"gorm.io/gorm"
45
47
)
46
48
47
-
var log = logging.Logger("bgs")
48
49
var tracer = otel.Tracer("bgs")
49
50
50
51
// serverListenerBootTimeout is how long to wait for the requested server socket
···
87
88
88
89
// Management of Compaction
89
90
compactor *Compactor
91
+
92
+
// User cache
93
+
userCache *lru.Cache[string, *User]
94
+
95
+
// nextCrawlers gets forwarded POST /xrpc/com.atproto.sync.requestCrawl
96
+
nextCrawlers []*url.URL
97
+
httpClient http.Client
98
+
99
+
log *slog.Logger
90
100
}
91
101
92
102
type PDSResync struct {
···
113
123
ConcurrencyPerPDS int64
114
124
MaxQueuePerPDS int64
115
125
NumCompactionWorkers int
126
+
127
+
// NextCrawlers gets forwarded POST /xrpc/com.atproto.sync.requestCrawl
128
+
NextCrawlers []*url.URL
116
129
}
117
130
118
131
func DefaultBGSConfig() *BGSConfig {
···
136
149
db.AutoMigrate(models.PDS{})
137
150
db.AutoMigrate(models.DomainBan{})
138
151
152
+
uc, _ := lru.New[string, *User](1_000_000)
153
+
139
154
bgs := &BGS{
140
155
Index: ix,
141
156
db: db,
···
151
166
consumers: make(map[uint64]*SocketConsumer),
152
167
153
168
pdsResyncs: make(map[uint]*PDSResync),
169
+
170
+
userCache: uc,
171
+
172
+
log: slog.Default().With("system", "bgs"),
154
173
}
155
174
156
175
ix.CreateExternalUser = bgs.createExternalUser
···
177
196
compactor.Start(bgs)
178
197
bgs.compactor = compactor
179
198
199
+
bgs.nextCrawlers = config.NextCrawlers
200
+
bgs.httpClient.Timeout = time.Second * 5
201
+
180
202
return bgs, nil
181
203
}
182
204
···
226
248
act, err := bgs.Index.GetUserOrMissing(ctx, did)
227
249
if err != nil {
228
250
w.WriteHeader(500)
229
-
log.Errorf("failed to get user: %s", err)
251
+
bgs.log.Error("failed to get user", "err", err)
230
252
return
231
253
}
232
254
233
255
if err := bgs.Index.Crawler.Crawl(ctx, act); err != nil {
234
256
w.WriteHeader(500)
235
-
log.Errorf("failed to add user to crawler: %s", err)
257
+
bgs.log.Error("failed to add user to crawler", "err", err)
236
258
return
237
259
}
238
260
})
···
317
339
if err2 := ctx.JSON(err.Code, map[string]any{
318
340
"error": err.Message,
319
341
}); err2 != nil {
320
-
log.Errorf("Failed to write http error: %s", err2)
342
+
bgs.log.Error("Failed to write http error", "err", err2)
321
343
}
322
344
default:
323
345
sendHeader := true
···
325
347
sendHeader = false
326
348
}
327
349
328
-
log.Warnf("HANDLER ERROR: (%s) %s", ctx.Path(), err)
350
+
bgs.log.Warn("HANDLER ERROR: (%s) %s", ctx.Path(), err)
329
351
330
352
if strings.HasPrefix(ctx.Path(), "/admin/") {
331
353
ctx.JSON(500, map[string]any{
···
418
440
419
441
func (bgs *BGS) HandleHealthCheck(c echo.Context) error {
420
442
if err := bgs.db.Exec("SELECT 1").Error; err != nil {
421
-
log.Errorf("healthcheck can't connect to database: %v", err)
443
+
bgs.log.Error("healthcheck can't connect to database", "err", err)
422
444
return c.JSON(500, HealthStatus{Status: "error", Message: "can't connect to database"})
423
445
} else {
424
446
return c.JSON(200, HealthStatus{Status: "ok"})
···
521
543
522
544
// UpstreamStatus is the state of the user as reported by the upstream PDS
523
545
UpstreamStatus string `gorm:"index"`
546
+
547
+
lk sync.Mutex
548
+
}
549
+
550
+
func (u *User) SetTakenDown(v bool) {
551
+
u.lk.Lock()
552
+
defer u.lk.Unlock()
553
+
u.TakenDown = v
554
+
}
555
+
556
+
func (u *User) GetTakenDown() bool {
557
+
u.lk.Lock()
558
+
defer u.lk.Unlock()
559
+
return u.TakenDown
560
+
}
561
+
562
+
func (u *User) SetTombstoned(v bool) {
563
+
u.lk.Lock()
564
+
defer u.lk.Unlock()
565
+
u.Tombstoned = v
566
+
}
567
+
568
+
func (u *User) GetTombstoned() bool {
569
+
u.lk.Lock()
570
+
defer u.lk.Unlock()
571
+
return u.Tombstoned
572
+
}
573
+
574
+
func (u *User) SetUpstreamStatus(v string) {
575
+
u.lk.Lock()
576
+
defer u.lk.Unlock()
577
+
u.UpstreamStatus = v
578
+
}
579
+
580
+
func (u *User) GetUpstreamStatus() string {
581
+
u.lk.Lock()
582
+
defer u.lk.Unlock()
583
+
return u.UpstreamStatus
524
584
}
525
585
526
586
type addTargetBody struct {
···
547
607
548
608
var m = &dto.Metric{}
549
609
if err := c.EventsSent.Write(m); err != nil {
550
-
log.Errorf("failed to get sent counter: %s", err)
610
+
bgs.log.Error("failed to get sent counter", "err", err)
551
611
}
552
612
553
-
log.Infow("consumer disconnected",
613
+
bgs.log.Info("consumer disconnected",
554
614
"consumer_id", id,
555
615
"remote_addr", c.RemoteAddr,
556
616
"user_agent", c.UserAgent,
···
602
662
}
603
663
604
664
if err := conn.WriteControl(websocket.PingMessage, []byte{}, time.Now().Add(5*time.Second)); err != nil {
605
-
log.Warnf("failed to ping client: %s", err)
665
+
bgs.log.Warn("failed to ping client", "err", err)
606
666
cancel()
607
667
return
608
668
}
···
627
687
for {
628
688
_, _, err := conn.ReadMessage()
629
689
if err != nil {
630
-
log.Warnf("failed to read message from client: %s", err)
690
+
bgs.log.Warn("failed to read message from client", "err", err)
631
691
cancel()
632
692
return
633
693
}
···
654
714
consumerID := bgs.registerConsumer(&consumer)
655
715
defer bgs.cleanupConsumer(consumerID)
656
716
657
-
logger := log.With(
717
+
logger := bgs.log.With(
658
718
"consumer_id", consumerID,
659
719
"remote_addr", consumer.RemoteAddr,
660
720
"user_agent", consumer.UserAgent,
661
721
)
662
722
663
-
logger.Infow("new consumer", "cursor", since)
723
+
logger.Info("new consumer", "cursor", since)
664
724
665
725
for {
666
726
select {
···
672
732
673
733
wc, err := conn.NextWriter(websocket.BinaryMessage)
674
734
if err != nil {
675
-
logger.Errorf("failed to get next writer: %s", err)
735
+
logger.Error("failed to get next writer", "err", err)
676
736
return err
677
737
}
678
738
···
686
746
}
687
747
688
748
if err := wc.Close(); err != nil {
689
-
logger.Warnf("failed to flush-close our event write: %s", err)
749
+
logger.Warn("failed to flush-close our event write", "err", err)
690
750
return nil
691
751
}
692
752
···
707
767
// defensive in case things change under the hood.
708
768
registry, ok := promclient.DefaultRegisterer.(*promclient.Registry)
709
769
if !ok {
710
-
log.Warnf("failed to export default prometheus registry; some metrics will be unavailable; unexpected type: %T", promclient.DefaultRegisterer)
770
+
slog.Warn("failed to export default prometheus registry; some metrics will be unavailable; unexpected type", "type", reflect.TypeOf(promclient.DefaultRegisterer))
711
771
}
712
772
exporter, err := prometheus.NewExporter(prometheus.Options{
713
773
Registry: registry,
714
774
Namespace: "bigsky",
715
775
})
716
776
if err != nil {
717
-
log.Errorf("could not create the prometheus stats exporter: %v", err)
777
+
slog.Error("could not create the prometheus stats exporter", "err", err, "system", "bgs")
718
778
}
719
779
720
780
return exporter
···
771
831
ctx, span := tracer.Start(ctx, "lookupUserByDid")
772
832
defer span.End()
773
833
834
+
cu, ok := bgs.userCache.Get(did)
835
+
if ok {
836
+
return cu, nil
837
+
}
838
+
774
839
var u User
775
840
if err := bgs.db.Find(&u, "did = ?", did).Error; err != nil {
776
841
return nil, err
···
779
844
if u.ID == 0 {
780
845
return nil, gorm.ErrRecordNotFound
781
846
}
847
+
848
+
bgs.userCache.Add(did, &u)
782
849
783
850
return &u, nil
784
851
}
···
822
889
case env.RepoCommit != nil:
823
890
repoCommitsReceivedCounter.WithLabelValues(host.Host).Add(1)
824
891
evt := env.RepoCommit
825
-
log.Debugw("bgs got repo append event", "seq", evt.Seq, "pdsHost", host.Host, "repo", evt.Repo)
892
+
bgs.log.Debug("bgs got repo append event", "seq", evt.Seq, "pdsHost", host.Host, "repo", evt.Repo)
893
+
894
+
s := time.Now()
826
895
u, err := bgs.lookupUserByDid(ctx, evt.Repo)
896
+
userLookupDuration.Observe(time.Since(s).Seconds())
827
897
if err != nil {
828
898
if !errors.Is(err, gorm.ErrRecordNotFound) {
899
+
repoCommitsResultCounter.WithLabelValues(host.Host, "nou").Inc()
829
900
return fmt.Errorf("looking up event user: %w", err)
830
901
}
831
902
832
903
newUsersDiscovered.Inc()
904
+
start := time.Now()
833
905
subj, err := bgs.createExternalUser(ctx, evt.Repo)
906
+
newUserDiscoveryDuration.Observe(time.Since(start).Seconds())
834
907
if err != nil {
908
+
repoCommitsResultCounter.WithLabelValues(host.Host, "uerr").Inc()
835
909
return fmt.Errorf("fed event create external user: %w", err)
836
910
}
837
911
···
840
914
u.Did = evt.Repo
841
915
}
842
916
843
-
span.SetAttributes(attribute.String("upstream_status", u.UpstreamStatus))
917
+
ustatus := u.GetUpstreamStatus()
918
+
span.SetAttributes(attribute.String("upstream_status", ustatus))
844
919
845
-
if u.TakenDown || u.UpstreamStatus == events.AccountStatusTakendown {
846
-
span.SetAttributes(attribute.Bool("taken_down_by_relay_admin", u.TakenDown))
847
-
log.Debugw("dropping commit event from taken down user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host)
920
+
if u.GetTakenDown() || ustatus == events.AccountStatusTakendown {
921
+
span.SetAttributes(attribute.Bool("taken_down_by_relay_admin", u.GetTakenDown()))
922
+
bgs.log.Debug("dropping commit event from taken down user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host)
923
+
repoCommitsResultCounter.WithLabelValues(host.Host, "tdu").Inc()
848
924
return nil
849
925
}
850
926
851
-
if u.UpstreamStatus == events.AccountStatusSuspended {
852
-
log.Debugw("dropping commit event from suspended user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host)
927
+
if ustatus == events.AccountStatusSuspended {
928
+
bgs.log.Debug("dropping commit event from suspended user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host)
929
+
repoCommitsResultCounter.WithLabelValues(host.Host, "susu").Inc()
853
930
return nil
854
931
}
855
932
856
-
if u.UpstreamStatus == events.AccountStatusDeactivated {
857
-
log.Debugw("dropping commit event from deactivated user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host)
933
+
if ustatus == events.AccountStatusDeactivated {
934
+
bgs.log.Debug("dropping commit event from deactivated user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host)
935
+
repoCommitsResultCounter.WithLabelValues(host.Host, "du").Inc()
858
936
return nil
859
937
}
860
938
861
939
if evt.Rebase {
940
+
repoCommitsResultCounter.WithLabelValues(host.Host, "rebase").Inc()
862
941
return fmt.Errorf("rebase was true in event seq:%d,host:%s", evt.Seq, host.Host)
863
942
}
864
943
865
944
if host.ID != u.PDS && u.PDS != 0 {
866
-
log.Warnw("received event for repo from different pds than expected", "repo", evt.Repo, "expPds", u.PDS, "gotPds", host.Host)
945
+
bgs.log.Warn("received event for repo from different pds than expected", "repo", evt.Repo, "expPds", u.PDS, "gotPds", host.Host)
867
946
// Flush any cached DID documents for this user
868
947
bgs.didr.FlushCacheFor(env.RepoCommit.Repo)
869
948
870
949
subj, err := bgs.createExternalUser(ctx, evt.Repo)
871
950
if err != nil {
951
+
repoCommitsResultCounter.WithLabelValues(host.Host, "uerr2").Inc()
872
952
return err
873
953
}
874
954
875
955
if subj.PDS != host.ID {
956
+
repoCommitsResultCounter.WithLabelValues(host.Host, "noauth").Inc()
876
957
return fmt.Errorf("event from non-authoritative pds")
877
958
}
878
959
}
879
960
880
-
if u.Tombstoned {
961
+
if u.GetTombstoned() {
881
962
span.SetAttributes(attribute.Bool("tombstoned", true))
882
963
// we've checked the authority of the users PDS, so reinstate the account
883
964
if err := bgs.db.Model(&User{}).Where("id = ?", u.ID).UpdateColumn("tombstoned", false).Error; err != nil {
965
+
repoCommitsResultCounter.WithLabelValues(host.Host, "tomb").Inc()
884
966
return fmt.Errorf("failed to un-tombstone a user: %w", err)
885
967
}
968
+
u.SetTombstoned(false)
886
969
887
970
ai, err := bgs.Index.LookupUser(ctx, u.ID)
888
971
if err != nil {
972
+
repoCommitsResultCounter.WithLabelValues(host.Host, "nou2").Inc()
889
973
return fmt.Errorf("failed to look up user (tombstone recover): %w", err)
890
974
}
891
975
892
976
// Now a simple re-crawl should suffice to bring the user back online
977
+
repoCommitsResultCounter.WithLabelValues(host.Host, "catchupt").Inc()
893
978
return bgs.Index.Crawler.AddToCatchupQueue(ctx, host, ai, evt)
894
979
}
895
980
···
898
983
rebasesCounter.WithLabelValues(host.Host).Add(1)
899
984
ai, err := bgs.Index.LookupUser(ctx, u.ID)
900
985
if err != nil {
986
+
repoCommitsResultCounter.WithLabelValues(host.Host, "nou3").Inc()
901
987
return fmt.Errorf("failed to look up user (slow path): %w", err)
902
988
}
903
989
···
909
995
// processor coming off of the pds stream, we should investigate
910
996
// whether or not we even need this 'slow path' logic, as it makes
911
997
// accounting for which events have been processed much harder
998
+
repoCommitsResultCounter.WithLabelValues(host.Host, "catchup").Inc()
912
999
return bgs.Index.Crawler.AddToCatchupQueue(ctx, host, ai, evt)
913
1000
}
914
1001
915
1002
if err := bgs.repoman.HandleExternalUserEvent(ctx, host.ID, u.ID, u.Did, evt.Since, evt.Rev, evt.Blocks, evt.Ops); err != nil {
916
-
log.Warnw("failed handling event", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "prev", stringLink(evt.Prev), "commit", evt.Commit.String())
917
1003
918
1004
if errors.Is(err, carstore.ErrRepoBaseMismatch) || ipld.IsNotFound(err) {
919
1005
ai, lerr := bgs.Index.LookupUser(ctx, u.ID)
920
1006
if lerr != nil {
1007
+
log.Warn("failed handling event, no user", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "prev", stringLink(evt.Prev), "commit", evt.Commit.String())
1008
+
repoCommitsResultCounter.WithLabelValues(host.Host, "nou4").Inc()
921
1009
return fmt.Errorf("failed to look up user %s (%d) (err case: %s): %w", u.Did, u.ID, err, lerr)
922
1010
}
923
1011
924
1012
span.SetAttributes(attribute.Bool("catchup_queue", true))
925
1013
1014
+
log.Info("failed handling event, catchup", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "prev", stringLink(evt.Prev), "commit", evt.Commit.String())
1015
+
repoCommitsResultCounter.WithLabelValues(host.Host, "catchup2").Inc()
926
1016
return bgs.Index.Crawler.AddToCatchupQueue(ctx, host, ai, evt)
927
1017
}
928
1018
1019
+
log.Warn("failed handling event", "err", err, "pdsHost", host.Host, "seq", evt.Seq, "repo", u.Did, "prev", stringLink(evt.Prev), "commit", evt.Commit.String())
1020
+
repoCommitsResultCounter.WithLabelValues(host.Host, "err").Inc()
929
1021
return fmt.Errorf("handle user event failed: %w", err)
930
1022
}
931
1023
1024
+
repoCommitsResultCounter.WithLabelValues(host.Host, "ok").Inc()
932
1025
return nil
933
1026
case env.RepoHandle != nil:
934
-
log.Infow("bgs got repo handle event", "did", env.RepoHandle.Did, "handle", env.RepoHandle.Handle)
1027
+
bgs.log.Info("bgs got repo handle event", "did", env.RepoHandle.Did, "handle", env.RepoHandle.Handle)
935
1028
// Flush any cached DID documents for this user
936
1029
bgs.didr.FlushCacheFor(env.RepoHandle.Did)
937
1030
···
942
1035
}
943
1036
944
1037
if act.Handle.String != env.RepoHandle.Handle {
945
-
log.Warnw("handle update did not update handle to asserted value", "did", env.RepoHandle.Did, "expected", env.RepoHandle.Handle, "actual", act.Handle)
1038
+
bgs.log.Warn("handle update did not update handle to asserted value", "did", env.RepoHandle.Did, "expected", env.RepoHandle.Handle, "actual", act.Handle)
946
1039
}
947
1040
948
1041
// TODO: Update the ReposHandle event type to include "verified" or something
···
956
1049
},
957
1050
})
958
1051
if err != nil {
959
-
log.Errorw("failed to broadcast RepoHandle event", "error", err, "did", env.RepoHandle.Did, "handle", env.RepoHandle.Handle)
1052
+
bgs.log.Error("failed to broadcast RepoHandle event", "error", err, "did", env.RepoHandle.Did, "handle", env.RepoHandle.Handle)
960
1053
return fmt.Errorf("failed to broadcast RepoHandle event: %w", err)
961
1054
}
962
1055
963
1056
return nil
964
1057
case env.RepoIdentity != nil:
965
-
log.Infow("bgs got identity event", "did", env.RepoIdentity.Did)
1058
+
bgs.log.Info("bgs got identity event", "did", env.RepoIdentity.Did)
966
1059
// Flush any cached DID documents for this user
967
1060
bgs.didr.FlushCacheFor(env.RepoIdentity.Did)
968
1061
···
982
1075
},
983
1076
})
984
1077
if err != nil {
985
-
log.Errorw("failed to broadcast Identity event", "error", err, "did", env.RepoIdentity.Did)
1078
+
bgs.log.Error("failed to broadcast Identity event", "error", err, "did", env.RepoIdentity.Did)
986
1079
return fmt.Errorf("failed to broadcast Identity event: %w", err)
987
1080
}
988
1081
···
998
1091
span.SetAttributes(attribute.String("repo_status", *env.RepoAccount.Status))
999
1092
}
1000
1093
1001
-
log.Infow("bgs got account event", "did", env.RepoAccount.Did)
1094
+
bgs.log.Info("bgs got account event", "did", env.RepoAccount.Did)
1002
1095
// Flush any cached DID documents for this user
1003
1096
bgs.didr.FlushCacheFor(env.RepoAccount.Did)
1004
1097
···
1012
1105
// Check if the PDS is still authoritative
1013
1106
// if not we don't want to be propagating this account event
1014
1107
if ai.PDS != host.ID {
1015
-
log.Errorw("account event from non-authoritative pds",
1108
+
bgs.log.Error("account event from non-authoritative pds",
1016
1109
"seq", env.RepoAccount.Seq,
1017
1110
"did", env.RepoAccount.Did,
1018
1111
"event_from", host.Host,
···
1041
1134
return fmt.Errorf("failed to look up user by did: %w", err)
1042
1135
}
1043
1136
1044
-
if u.TakenDown {
1137
+
if u.GetTakenDown() {
1045
1138
shouldBeActive = false
1046
1139
status = &events.AccountStatusTakendown
1047
1140
}
···
1057
1150
},
1058
1151
})
1059
1152
if err != nil {
1060
-
log.Errorw("failed to broadcast Account event", "error", err, "did", env.RepoAccount.Did)
1153
+
bgs.log.Error("failed to broadcast Account event", "error", err, "did", env.RepoAccount.Did)
1061
1154
return fmt.Errorf("failed to broadcast Account event: %w", err)
1062
1155
}
1063
1156
···
1095
1188
}).Error; err != nil {
1096
1189
return err
1097
1190
}
1191
+
u.SetTombstoned(true)
1098
1192
1099
1193
if err := bgs.db.Model(&models.ActorInfo{}).Where("uid = ?", u.ID).UpdateColumns(map[string]any{
1100
1194
"handle": nil,
···
1105
1199
// delete data from carstore
1106
1200
if err := bgs.repoman.TakeDownRepo(ctx, u.ID); err != nil {
1107
1201
// don't let a failure here prevent us from propagating this event
1108
-
log.Errorf("failed to delete user data from carstore: %s", err)
1202
+
bgs.log.Error("failed to delete user data from carstore", "err", err)
1109
1203
}
1110
1204
1111
1205
return bgs.events.AddEvent(ctx, &events.XRPCStreamEvent{
···
1120
1214
1121
1215
externalUserCreationAttempts.Inc()
1122
1216
1123
-
log.Debugf("create external user: %s", did)
1217
+
s.log.Debug("create external user", "did", did)
1124
1218
doc, err := s.didr.GetDocument(ctx, did)
1125
1219
if err != nil {
1126
1220
return nil, fmt.Errorf("could not locate DID document for followed user (%s): %w", did, err)
···
1143
1237
// TODO: the PDS's DID should also be in the service, we could use that to look up?
1144
1238
var peering models.PDS
1145
1239
if err := s.db.Find(&peering, "host = ?", durl.Host).Error; err != nil {
1146
-
log.Error("failed to find pds", durl.Host)
1240
+
s.log.Error("failed to find pds", "host", durl.Host)
1147
1241
return nil, err
1148
1242
}
1149
1243
···
1216
1310
defer func() {
1217
1311
if !successfullyCreated {
1218
1312
if err := s.db.Model(&models.PDS{}).Where("id = ?", peering.ID).Update("repo_count", gorm.Expr("repo_count - 1")).Error; err != nil {
1219
-
log.Errorf("failed to decrement repo count for pds: %s", err)
1313
+
s.log.Error("failed to decrement repo count for pds", "err", err)
1220
1314
}
1221
1315
}
1222
1316
}()
···
1230
1324
return nil, err
1231
1325
}
1232
1326
1233
-
log.Debugw("creating external user", "did", did, "handle", hurl.Host, "pds", peering.ID)
1327
+
s.log.Debug("creating external user", "did", did, "handle", hurl.Host, "pds", peering.ID)
1234
1328
1235
1329
handle := hurl.Host
1236
1330
···
1238
1332
1239
1333
resdid, err := s.hr.ResolveHandleToDid(ctx, handle)
1240
1334
if err != nil {
1241
-
log.Errorf("failed to resolve users claimed handle (%q) on pds: %s", handle, err)
1335
+
s.log.Error("failed to resolve users claimed handle on pds", "handle", handle, "err", err)
1242
1336
validHandle = false
1243
1337
}
1244
1338
1245
1339
if resdid != did {
1246
-
log.Errorf("claimed handle did not match servers response (%s != %s)", resdid, did)
1340
+
s.log.Error("claimed handle did not match servers response", "resdid", resdid, "did", did)
1247
1341
validHandle = false
1248
1342
}
1249
1343
···
1252
1346
1253
1347
exu, err := s.Index.LookupUserByDid(ctx, did)
1254
1348
if err == nil {
1255
-
log.Debugw("lost the race to create a new user", "did", did, "handle", handle, "existing_hand", exu.Handle)
1349
+
s.log.Debug("lost the race to create a new user", "did", did, "handle", handle, "existing_hand", exu.Handle)
1256
1350
if exu.PDS != peering.ID {
1257
1351
// User is now on a different PDS, update
1258
1352
if err := s.db.Model(User{}).Where("id = ?", exu.Uid).Update("pds", peering.ID).Error; err != nil {
···
1323
1417
if err := s.db.Create(&u).Error; err != nil {
1324
1418
return nil, fmt.Errorf("failed to create user after handle conflict: %w", err)
1325
1419
}
1420
+
1421
+
s.userCache.Remove(did)
1326
1422
} else {
1327
1423
return nil, fmt.Errorf("failed to create other pds user: %w", err)
1328
1424
}
···
1370
1466
if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("upstream_status", events.AccountStatusActive).Error; err != nil {
1371
1467
return fmt.Errorf("failed to set user active status: %w", err)
1372
1468
}
1469
+
u.SetUpstreamStatus(events.AccountStatusActive)
1373
1470
case events.AccountStatusDeactivated:
1374
1471
if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("upstream_status", events.AccountStatusDeactivated).Error; err != nil {
1375
1472
return fmt.Errorf("failed to set user deactivation status: %w", err)
1376
1473
}
1474
+
u.SetUpstreamStatus(events.AccountStatusDeactivated)
1377
1475
case events.AccountStatusSuspended:
1378
1476
if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("upstream_status", events.AccountStatusSuspended).Error; err != nil {
1379
1477
return fmt.Errorf("failed to set user suspension status: %w", err)
1380
1478
}
1479
+
u.SetUpstreamStatus(events.AccountStatusSuspended)
1381
1480
case events.AccountStatusTakendown:
1382
1481
if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("upstream_status", events.AccountStatusTakendown).Error; err != nil {
1383
1482
return fmt.Errorf("failed to set user taken down status: %w", err)
1384
1483
}
1484
+
u.SetUpstreamStatus(events.AccountStatusTakendown)
1385
1485
1386
1486
if err := bgs.db.Model(&models.ActorInfo{}).Where("uid = ?", u.ID).UpdateColumns(map[string]any{
1387
1487
"handle": nil,
···
1396
1496
}).Error; err != nil {
1397
1497
return err
1398
1498
}
1499
+
u.SetUpstreamStatus(events.AccountStatusDeleted)
1399
1500
1400
1501
if err := bgs.db.Model(&models.ActorInfo{}).Where("uid = ?", u.ID).UpdateColumns(map[string]any{
1401
1502
"handle": nil,
···
1406
1507
// delete data from carstore
1407
1508
if err := bgs.repoman.TakeDownRepo(ctx, u.ID); err != nil {
1408
1509
// don't let a failure here prevent us from propagating this event
1409
-
log.Errorf("failed to delete user data from carstore: %s", err)
1510
+
bgs.log.Error("failed to delete user data from carstore", "err", err)
1410
1511
}
1411
1512
}
1412
1513
···
1422
1523
if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("taken_down", true).Error; err != nil {
1423
1524
return err
1424
1525
}
1526
+
u.SetTakenDown(true)
1425
1527
1426
1528
if err := bgs.repoman.TakeDownRepo(ctx, u.ID); err != nil {
1427
1529
return err
···
1443
1545
if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("taken_down", false).Error; err != nil {
1444
1546
return err
1445
1547
}
1548
+
u.SetTakenDown(false)
1446
1549
1447
1550
return nil
1448
1551
}
···
1511
1614
func (bgs *BGS) ResyncPDS(ctx context.Context, pds models.PDS) error {
1512
1615
ctx, span := tracer.Start(ctx, "ResyncPDS")
1513
1616
defer span.End()
1514
-
log := log.With("pds", pds.Host, "source", "resync_pds")
1617
+
log := bgs.log.With("pds", pds.Host, "source", "resync_pds")
1515
1618
resync, found := bgs.LoadOrStoreResync(pds)
1516
1619
if found {
1517
1620
return fmt.Errorf("resync already in progress")
···
1543
1646
for {
1544
1647
pages++
1545
1648
if pages%10 == 0 {
1546
-
log.Warnw("fetching PDS page during resync", "pages", pages, "total_repos", len(repos))
1649
+
log.Warn("fetching PDS page during resync", "pages", pages, "total_repos", len(repos))
1547
1650
resync.NumRepoPages = pages
1548
1651
resync.NumRepos = len(repos)
1549
1652
bgs.UpdateResync(resync)
1550
1653
}
1551
1654
if err := limiter.Wait(ctx); err != nil {
1552
-
log.Errorw("failed to wait for rate limiter", "error", err)
1655
+
log.Error("failed to wait for rate limiter", "error", err)
1553
1656
return fmt.Errorf("failed to wait for rate limiter: %w", err)
1554
1657
}
1555
1658
repoList, err := comatproto.SyncListRepos(ctx, &xrpcc, cursor, limit)
1556
1659
if err != nil {
1557
-
log.Errorw("failed to list repos", "error", err)
1660
+
log.Error("failed to list repos", "error", err)
1558
1661
return fmt.Errorf("failed to list repos: %w", err)
1559
1662
}
1560
1663
···
1576
1679
1577
1680
repolistDone := time.Now()
1578
1681
1579
-
log.Warnw("listed all repos, checking roots", "num_repos", len(repos), "took", repolistDone.Sub(start))
1682
+
log.Warn("listed all repos, checking roots", "num_repos", len(repos), "took", repolistDone.Sub(start))
1580
1683
resync = bgs.SetResyncStatus(pds.ID, "checking revs")
1581
1684
1582
-
// Create a buffered channel for collecting results
1583
-
results := make(chan revCheckResult, len(repos))
1685
+
// run loop over repos with some concurrency
1584
1686
sem := semaphore.NewWeighted(40)
1585
1687
1586
1688
// Check repo revs against our local copy and enqueue crawls for any that are out of date
1587
-
for _, r := range repos {
1689
+
for i, r := range repos {
1588
1690
if err := sem.Acquire(ctx, 1); err != nil {
1589
-
log.Errorw("failed to acquire semaphore", "error", err)
1590
-
results <- revCheckResult{err: err}
1691
+
log.Error("failed to acquire semaphore", "error", err)
1591
1692
continue
1592
1693
}
1593
1694
go func(r comatproto.SyncListRepos_Repo) {
1594
1695
defer sem.Release(1)
1595
-
log := log.With("did", r.Did, "remote_rev", r.Rev)
1696
+
log := bgs.log.With("did", r.Did, "remote_rev", r.Rev)
1596
1697
// Fetches the user if we have it, otherwise automatically enqueues it for crawling
1597
1698
ai, err := bgs.Index.GetUserOrMissing(ctx, r.Did)
1598
1699
if err != nil {
1599
-
log.Errorw("failed to get user while resyncing PDS, we can't recrawl it", "error", err)
1600
-
results <- revCheckResult{err: err}
1700
+
log.Error("failed to get user while resyncing PDS, we can't recrawl it", "error", err)
1601
1701
return
1602
1702
}
1603
1703
1604
1704
rev, err := bgs.repoman.GetRepoRev(ctx, ai.Uid)
1605
1705
if err != nil {
1606
-
log.Warnw("recrawling because we failed to get the local repo root", "err", err, "uid", ai.Uid)
1607
-
results <- revCheckResult{ai: ai}
1706
+
log.Warn("recrawling because we failed to get the local repo root", "err", err, "uid", ai.Uid)
1707
+
err := bgs.Index.Crawler.Crawl(ctx, ai)
1708
+
if err != nil {
1709
+
log.Error("failed to enqueue crawl for repo during resync", "error", err, "uid", ai.Uid, "did", ai.Did)
1710
+
}
1608
1711
return
1609
1712
}
1610
1713
1611
1714
if rev == "" || rev < r.Rev {
1612
-
log.Warnw("recrawling because the repo rev from the PDS is newer than our local repo rev", "local_rev", rev)
1613
-
results <- revCheckResult{ai: ai}
1715
+
log.Warn("recrawling because the repo rev from the PDS is newer than our local repo rev", "local_rev", rev)
1716
+
err := bgs.Index.Crawler.Crawl(ctx, ai)
1717
+
if err != nil {
1718
+
log.Error("failed to enqueue crawl for repo during resync", "error", err, "uid", ai.Uid, "did", ai.Did)
1719
+
}
1614
1720
return
1615
1721
}
1616
-
1617
-
results <- revCheckResult{}
1618
1722
}(r)
1619
-
}
1620
-
1621
-
var numReposToResync int
1622
-
for i := 0; i < len(repos); i++ {
1623
-
res := <-results
1624
-
if res.err != nil {
1625
-
log.Errorw("failed to process repo during resync", "error", res.err)
1626
-
1627
-
}
1628
-
if res.ai != nil {
1629
-
numReposToResync++
1630
-
err := bgs.Index.Crawler.Crawl(ctx, res.ai)
1631
-
if err != nil {
1632
-
log.Errorw("failed to enqueue crawl for repo during resync", "error", err, "uid", res.ai.Uid, "did", res.ai.Did)
1633
-
}
1634
-
}
1635
1723
if i%100 == 0 {
1636
1724
if i%10_000 == 0 {
1637
-
log.Warnw("checked revs during resync", "num_repos_checked", i, "num_repos_to_crawl", numReposToResync, "took", time.Now().Sub(resync.StatusChangedAt))
1725
+
log.Warn("checked revs during resync", "num_repos_checked", i, "num_repos_to_crawl", -1, "took", time.Now().Sub(resync.StatusChangedAt))
1638
1726
}
1639
1727
resync.NumReposChecked = i
1640
-
resync.NumReposToResync = numReposToResync
1641
1728
bgs.UpdateResync(resync)
1642
1729
}
1643
1730
}
1644
1731
1645
1732
resync.NumReposChecked = len(repos)
1646
-
resync.NumReposToResync = numReposToResync
1647
1733
bgs.UpdateResync(resync)
1648
1734
1649
-
log.Warnw("enqueued all crawls, exiting resync", "took", time.Now().Sub(start), "num_repos_to_crawl", numReposToResync)
1735
+
bgs.log.Warn("enqueued all crawls, exiting resync", "took", time.Now().Sub(start), "num_repos_to_crawl", -1)
1650
1736
1651
1737
return nil
1652
1738
}
+7
-7
bgs/compactor.go
+7
-7
bgs/compactor.go
···
210
210
}
211
211
if c.requeueInterval > 0 {
212
212
go func() {
213
-
log.Infow("starting compactor requeue routine",
213
+
log.Info("starting compactor requeue routine",
214
214
"interval", c.requeueInterval,
215
215
"limit", c.requeueLimit,
216
216
"shardCount", c.requeueShardCount,
···
226
226
ctx := context.Background()
227
227
ctx, span := otel.Tracer("compactor").Start(ctx, "RequeueRoutine")
228
228
if err := c.EnqueueAllRepos(ctx, bgs, c.requeueLimit, c.requeueShardCount, c.requeueFast); err != nil {
229
-
log.Errorw("failed to enqueue all repos", "err", err)
229
+
log.Error("failed to enqueue all repos", "err", err)
230
230
}
231
231
span.End()
232
232
}
···
262
262
time.Sleep(time.Second * 5)
263
263
continue
264
264
}
265
-
log.Errorw("failed to compact repo",
265
+
log.Error("failed to compact repo",
266
266
"err", err,
267
267
"uid", state.latestUID,
268
268
"repo", state.latestDID,
···
273
273
// Pause for a bit to avoid spamming failed compactions
274
274
time.Sleep(time.Millisecond * 100)
275
275
} else {
276
-
log.Infow("compacted repo",
276
+
log.Info("compacted repo",
277
277
"uid", state.latestUID,
278
278
"repo", state.latestDID,
279
279
"status", state.status,
···
349
349
return state, nil
350
350
}
351
351
352
-
func (c *Compactor) EnqueueRepo(ctx context.Context, user User, fast bool) {
352
+
func (c *Compactor) EnqueueRepo(ctx context.Context, user *User, fast bool) {
353
353
ctx, span := otel.Tracer("compactor").Start(ctx, "EnqueueRepo")
354
354
defer span.End()
355
-
log.Infow("enqueueing compaction for repo", "repo", user.Did, "uid", user.ID, "fast", fast)
355
+
log.Info("enqueueing compaction for repo", "repo", user.Did, "uid", user.ID, "fast", fast)
356
356
c.q.Append(user.ID, fast)
357
357
}
358
358
···
396
396
c.q.Append(r.Usr, fast)
397
397
}
398
398
399
-
log.Infow("done enqueueing all repos", "repos_enqueued", len(repos))
399
+
log.Info("done enqueueing all repos", "repos_enqueued", len(repos))
400
400
401
401
return nil
402
402
}
+36
-23
bgs/fedmgr.go
+36
-23
bgs/fedmgr.go
···
4
4
"context"
5
5
"errors"
6
6
"fmt"
7
+
"log/slog"
7
8
"math/rand"
8
9
"strings"
9
10
"sync"
···
21
22
pq "github.com/lib/pq"
22
23
"gorm.io/gorm"
23
24
)
25
+
26
+
var log = slog.Default().With("system", "bgs")
24
27
25
28
type IndexCallback func(context.Context, *models.PDS, *events.XRPCStreamEvent) error
26
29
···
129
132
var errs []error
130
133
if errs = s.flushCursors(ctx); len(errs) > 0 {
131
134
for _, err := range errs {
132
-
log.Errorf("failed to flush cursors on shutdown: %s", err)
135
+
log.Error("failed to flush cursors on shutdown", "err", err)
133
136
}
134
137
}
135
138
log.Info("done flushing PDS cursors on shutdown")
···
142
145
defer span.End()
143
146
if errs := s.flushCursors(ctx); len(errs) > 0 {
144
147
for _, err := range errs {
145
-
log.Errorf("failed to flush cursors: %s", err)
148
+
log.Error("failed to flush cursors", "err", err)
146
149
}
147
150
}
148
151
log.Debug("done flushing PDS cursors")
···
210
213
errs := <-s.shutdownResult
211
214
if len(errs) > 0 {
212
215
for _, err := range errs {
213
-
log.Errorf("shutdown error: %s", err)
216
+
log.Error("shutdown error", "err", err)
214
217
}
215
218
}
216
219
log.Info("slurper shutdown complete")
···
468
471
protocol = "wss"
469
472
}
470
473
474
+
// Special case `.host.bsky.network` PDSs to rewind cursor by 200 events to smooth over unclean shutdowns
475
+
if strings.HasSuffix(host.Host, ".host.bsky.network") && host.Cursor > 200 {
476
+
host.Cursor -= 200
477
+
}
478
+
471
479
cursor := host.Cursor
480
+
481
+
connectedInbound.Inc()
482
+
defer connectedInbound.Dec()
483
+
// TODO:? maybe keep a gauge of 'in retry backoff' sources?
472
484
473
485
var backoff int
474
486
for {
···
481
493
url := fmt.Sprintf("%s://%s/xrpc/com.atproto.sync.subscribeRepos?cursor=%d", protocol, host.Host, cursor)
482
494
con, res, err := d.DialContext(ctx, url, nil)
483
495
if err != nil {
484
-
log.Warnw("dialing failed", "pdsHost", host.Host, "err", err, "backoff", backoff)
496
+
log.Warn("dialing failed", "pdsHost", host.Host, "err", err, "backoff", backoff)
485
497
time.Sleep(sleepForBackoff(backoff))
486
498
backoff++
487
499
488
500
if backoff > 15 {
489
-
log.Warnw("pds does not appear to be online, disabling for now", "pdsHost", host.Host)
501
+
log.Warn("pds does not appear to be online, disabling for now", "pdsHost", host.Host)
490
502
if err := s.db.Model(&models.PDS{}).Where("id = ?", host.ID).Update("registered", false).Error; err != nil {
491
-
log.Errorf("failed to unregister failing pds: %w", err)
503
+
log.Error("failed to unregister failing pds", "err", err)
492
504
}
493
505
494
506
return
···
497
509
continue
498
510
}
499
511
500
-
log.Info("event subscription response code: ", res.StatusCode)
512
+
log.Info("event subscription response", "code", res.StatusCode)
501
513
502
514
curCursor := cursor
503
515
if err := s.handleConnection(ctx, host, con, &cursor, sub); err != nil {
504
516
if errors.Is(err, ErrTimeoutShutdown) {
505
-
log.Infof("shutting down pds subscription to %s, no activity after %s", host.Host, EventsTimeout)
517
+
log.Info("shutting down pds subscription after timeout", "host", host.Host, "time", EventsTimeout)
506
518
return
507
519
}
508
-
log.Warnf("connection to %q failed: %s", host.Host, err)
520
+
log.Warn("connection to failed", "host", host.Host, "err", err)
509
521
}
510
522
511
523
if cursor > curCursor {
···
536
548
537
549
rsc := &events.RepoStreamCallbacks{
538
550
RepoCommit: func(evt *comatproto.SyncSubscribeRepos_Commit) error {
539
-
log.Debugw("got remote repo event", "pdsHost", host.Host, "repo", evt.Repo, "seq", evt.Seq)
551
+
log.Debug("got remote repo event", "pdsHost", host.Host, "repo", evt.Repo, "seq", evt.Seq)
540
552
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
541
553
RepoCommit: evt,
542
554
}); err != nil {
543
-
log.Errorf("failed handling event from %q (%d): %s", host.Host, evt.Seq, err)
555
+
log.Error("failed handling event", "host", host.Host, "seq", evt.Seq, "err", err)
544
556
}
545
557
*lastCursor = evt.Seq
546
558
···
551
563
return nil
552
564
},
553
565
RepoHandle: func(evt *comatproto.SyncSubscribeRepos_Handle) error {
554
-
log.Infow("got remote handle update event", "pdsHost", host.Host, "did", evt.Did, "handle", evt.Handle)
566
+
log.Info("got remote handle update event", "pdsHost", host.Host, "did", evt.Did, "handle", evt.Handle)
555
567
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
556
568
RepoHandle: evt,
557
569
}); err != nil {
558
-
log.Errorf("failed handling event from %q (%d): %s", host.Host, evt.Seq, err)
570
+
log.Error("failed handling event", "host", host.Host, "seq", evt.Seq, "err", err)
559
571
}
560
572
*lastCursor = evt.Seq
561
573
···
566
578
return nil
567
579
},
568
580
RepoMigrate: func(evt *comatproto.SyncSubscribeRepos_Migrate) error {
569
-
log.Infow("got remote repo migrate event", "pdsHost", host.Host, "did", evt.Did, "migrateTo", evt.MigrateTo)
581
+
log.Info("got remote repo migrate event", "pdsHost", host.Host, "did", evt.Did, "migrateTo", evt.MigrateTo)
570
582
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
571
583
RepoMigrate: evt,
572
584
}); err != nil {
573
-
log.Errorf("failed handling event from %q (%d): %s", host.Host, evt.Seq, err)
585
+
log.Error("failed handling event", "host", host.Host, "seq", evt.Seq, "err", err)
574
586
}
575
587
*lastCursor = evt.Seq
576
588
···
581
593
return nil
582
594
},
583
595
RepoTombstone: func(evt *comatproto.SyncSubscribeRepos_Tombstone) error {
584
-
log.Infow("got remote repo tombstone event", "pdsHost", host.Host, "did", evt.Did)
596
+
log.Info("got remote repo tombstone event", "pdsHost", host.Host, "did", evt.Did)
585
597
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
586
598
RepoTombstone: evt,
587
599
}); err != nil {
588
-
log.Errorf("failed handling event from %q (%d): %s", host.Host, evt.Seq, err)
600
+
log.Error("failed handling event", "host", host.Host, "seq", evt.Seq, "err", err)
589
601
}
590
602
*lastCursor = evt.Seq
591
603
···
596
608
return nil
597
609
},
598
610
RepoInfo: func(info *comatproto.SyncSubscribeRepos_Info) error {
599
-
log.Infow("info event", "name", info.Name, "message", info.Message, "pdsHost", host.Host)
611
+
log.Info("info event", "name", info.Name, "message", info.Message, "pdsHost", host.Host)
600
612
return nil
601
613
},
602
614
RepoIdentity: func(ident *comatproto.SyncSubscribeRepos_Identity) error {
603
-
log.Infow("identity event", "did", ident.Did)
615
+
log.Info("identity event", "did", ident.Did)
604
616
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
605
617
RepoIdentity: ident,
606
618
}); err != nil {
607
-
log.Errorf("failed handling event from %q (%d): %s", host.Host, ident.Seq, err)
619
+
log.Error("failed handling event", "host", host.Host, "seq", ident.Seq, "err", err)
608
620
}
609
621
*lastCursor = ident.Seq
610
622
···
615
627
return nil
616
628
},
617
629
RepoAccount: func(acct *comatproto.SyncSubscribeRepos_Account) error {
618
-
log.Infow("account event", "did", acct.Did, "status", acct.Status)
630
+
log.Info("account event", "did", acct.Did, "status", acct.Status)
619
631
if err := s.cb(context.TODO(), host, &events.XRPCStreamEvent{
620
632
RepoAccount: acct,
621
633
}); err != nil {
622
-
log.Errorf("failed handling event from %q (%d): %s", host.Host, acct.Seq, err)
634
+
log.Error("failed handling event", "host", host.Host, "seq", acct.Seq, "err", err)
623
635
}
624
636
*lastCursor = acct.Seq
625
637
···
662
674
con.RemoteAddr().String(),
663
675
instrumentedRSC.EventHandler,
664
676
)
665
-
return events.HandleRepoStream(ctx, con, pool)
677
+
return events.HandleRepoStream(ctx, con, pool, nil)
666
678
}
667
679
668
680
func (s *Slurper) updateCursor(sub *activeSub, curs int64) error {
···
733
745
return fmt.Errorf("killing connection %q: %w", host, ErrNoActiveConnection)
734
746
}
735
747
ac.cancel()
748
+
// cleanup in the run thread subscribeWithRedialer() will delete(s.active, host)
736
749
737
750
if block {
738
751
if err := s.db.Model(models.PDS{}).Where("id = ?", ac.pds.ID).UpdateColumn("blocked", true).Error; err != nil {
+52
-24
bgs/handlers.go
+52
-24
bgs/handlers.go
···
3
3
import (
4
4
"bytes"
5
5
"context"
6
+
"encoding/json"
6
7
"errors"
7
8
"fmt"
8
9
"io"
···
30
31
if errors.Is(err, gorm.ErrRecordNotFound) {
31
32
return nil, echo.NewHTTPError(http.StatusNotFound, "user not found")
32
33
}
33
-
log.Errorw("failed to lookup user", "err", err, "did", did)
34
+
log.Error("failed to lookup user", "err", err, "did", did)
34
35
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to lookup user")
35
36
}
36
37
37
-
if u.Tombstoned {
38
+
if u.GetTombstoned() {
38
39
return nil, fmt.Errorf("account was deleted")
39
40
}
40
41
41
-
if u.TakenDown {
42
+
if u.GetTakenDown() {
42
43
return nil, fmt.Errorf("account was taken down by the Relay")
43
44
}
44
45
45
-
if u.UpstreamStatus == events.AccountStatusTakendown {
46
+
ustatus := u.GetUpstreamStatus()
47
+
if ustatus == events.AccountStatusTakendown {
46
48
return nil, fmt.Errorf("account was taken down by its PDS")
47
49
}
48
50
49
-
if u.UpstreamStatus == events.AccountStatusDeactivated {
51
+
if ustatus == events.AccountStatusDeactivated {
50
52
return nil, fmt.Errorf("account is temporarily deactivated")
51
53
}
52
54
53
-
if u.UpstreamStatus == events.AccountStatusSuspended {
55
+
if ustatus == events.AccountStatusSuspended {
54
56
return nil, fmt.Errorf("account is suspended by its PDS")
55
57
}
56
58
···
59
61
if errors.Is(err, mst.ErrNotFound) {
60
62
return nil, echo.NewHTTPError(http.StatusNotFound, "record not found in repo")
61
63
}
62
-
log.Errorw("failed to get record from repo", "err", err, "did", did, "collection", collection, "rkey", rkey)
64
+
log.Error("failed to get record from repo", "err", err, "did", did, "collection", collection, "rkey", rkey)
63
65
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to get record from repo")
64
66
}
65
67
···
87
89
if errors.Is(err, gorm.ErrRecordNotFound) {
88
90
return nil, echo.NewHTTPError(http.StatusNotFound, "user not found")
89
91
}
90
-
log.Errorw("failed to lookup user", "err", err, "did", did)
92
+
log.Error("failed to lookup user", "err", err, "did", did)
91
93
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to lookup user")
92
94
}
93
95
94
-
if u.Tombstoned {
96
+
if u.GetTombstoned() {
95
97
return nil, fmt.Errorf("account was deleted")
96
98
}
97
99
98
-
if u.TakenDown {
100
+
if u.GetTakenDown() {
99
101
return nil, fmt.Errorf("account was taken down by the Relay")
100
102
}
101
103
102
-
if u.UpstreamStatus == events.AccountStatusTakendown {
104
+
ustatus := u.GetUpstreamStatus()
105
+
if ustatus == events.AccountStatusTakendown {
103
106
return nil, fmt.Errorf("account was taken down by its PDS")
104
107
}
105
108
106
-
if u.UpstreamStatus == events.AccountStatusDeactivated {
109
+
if ustatus == events.AccountStatusDeactivated {
107
110
return nil, fmt.Errorf("account is temporarily deactivated")
108
111
}
109
112
110
-
if u.UpstreamStatus == events.AccountStatusSuspended {
113
+
if ustatus == events.AccountStatusSuspended {
111
114
return nil, fmt.Errorf("account is suspended by its PDS")
112
115
}
113
116
114
117
// TODO: stream the response
115
118
buf := new(bytes.Buffer)
116
119
if err := s.repoman.ReadRepo(ctx, u.ID, since, buf); err != nil {
117
-
log.Errorw("failed to read repo into buffer", "err", err, "did", did)
120
+
log.Error("failed to read repo into buffer", "err", err, "did", did)
118
121
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to read repo into buffer")
119
122
}
120
123
···
167
170
return echo.NewHTTPError(http.StatusUnauthorized, "domain is banned")
168
171
}
169
172
170
-
log.Warnf("TODO: better host validation for crawl requests")
173
+
log.Warn("TODO: better host validation for crawl requests")
171
174
172
175
clientHost := fmt.Sprintf("%s://%s", u.Scheme, host)
173
176
···
185
188
// Maybe we could do something with this response later
186
189
_ = desc
187
190
191
+
if len(s.nextCrawlers) != 0 {
192
+
blob, err := json.Marshal(body)
193
+
if err != nil {
194
+
log.Warn("could not forward requestCrawl, json err", "err", err)
195
+
} else {
196
+
go func(bodyBlob []byte) {
197
+
for _, rpu := range s.nextCrawlers {
198
+
pu := rpu.JoinPath("/xrpc/com.atproto.sync.requestCrawl")
199
+
response, err := s.httpClient.Post(pu.String(), "application/json", bytes.NewReader(bodyBlob))
200
+
if response != nil && response.Body != nil {
201
+
response.Body.Close()
202
+
}
203
+
if err != nil || response == nil {
204
+
log.Warn("requestCrawl forward failed", "host", rpu, "err", err)
205
+
} else if response.StatusCode != http.StatusOK {
206
+
log.Warn("requestCrawl forward failed", "host", rpu, "status", response.Status)
207
+
} else {
208
+
log.Info("requestCrawl forward successful", "host", rpu)
209
+
}
210
+
}
211
+
}(blob)
212
+
}
213
+
}
214
+
188
215
return s.slurper.SubscribeToPds(ctx, host, true, false)
189
216
}
190
217
···
204
231
if err == gorm.ErrRecordNotFound {
205
232
return &comatprototypes.SyncListRepos_Output{}, nil
206
233
}
207
-
log.Errorw("failed to query users", "err", err)
234
+
log.Error("failed to query users", "err", err)
208
235
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to query users")
209
236
}
210
237
···
225
252
226
253
root, err := s.repoman.GetRepoRoot(ctx, user.ID)
227
254
if err != nil {
228
-
log.Errorw("failed to get repo root", "err", err, "did", user.Did)
255
+
log.Error("failed to get repo root", "err", err, "did", user.Did)
229
256
return nil, echo.NewHTTPError(http.StatusInternalServerError, fmt.Sprintf("failed to get repo root for (%s): %v", user.Did, err.Error()))
230
257
}
231
258
···
253
280
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to lookup user")
254
281
}
255
282
256
-
if u.Tombstoned {
283
+
if u.GetTombstoned() {
257
284
return nil, fmt.Errorf("account was deleted")
258
285
}
259
286
260
-
if u.TakenDown {
287
+
if u.GetTakenDown() {
261
288
return nil, fmt.Errorf("account was taken down by the Relay")
262
289
}
263
290
264
-
if u.UpstreamStatus == events.AccountStatusTakendown {
291
+
ustatus := u.GetUpstreamStatus()
292
+
if ustatus == events.AccountStatusTakendown {
265
293
return nil, fmt.Errorf("account was taken down by its PDS")
266
294
}
267
295
268
-
if u.UpstreamStatus == events.AccountStatusDeactivated {
296
+
if ustatus == events.AccountStatusDeactivated {
269
297
return nil, fmt.Errorf("account is temporarily deactivated")
270
298
}
271
299
272
-
if u.UpstreamStatus == events.AccountStatusSuspended {
300
+
if ustatus == events.AccountStatusSuspended {
273
301
return nil, fmt.Errorf("account is suspended by its PDS")
274
302
}
275
303
276
304
root, err := s.repoman.GetRepoRoot(ctx, u.ID)
277
305
if err != nil {
278
-
log.Errorw("failed to get repo root", "err", err, "did", u.Did)
306
+
log.Error("failed to get repo root", "err", err, "did", u.Did)
279
307
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to get repo root")
280
308
}
281
309
282
310
rev, err := s.repoman.GetRepoRev(ctx, u.ID)
283
311
if err != nil {
284
-
log.Errorw("failed to get repo rev", "err", err, "did", u.Did)
312
+
log.Error("failed to get repo rev", "err", err, "did", u.Did)
285
313
return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to get repo rev")
286
314
}
287
315
+22
bgs/metrics.go
+22
bgs/metrics.go
···
27
27
Help: "The total number of events received",
28
28
}, []string{"pds"})
29
29
30
+
var repoCommitsResultCounter = promauto.NewCounterVec(prometheus.CounterOpts{
31
+
Name: "repo_commits_result_counter",
32
+
Help: "The results of commit events received",
33
+
}, []string{"pds", "status"})
34
+
30
35
var rebasesCounter = promauto.NewCounterVec(prometheus.CounterOpts{
31
36
Name: "event_rebases",
32
37
Help: "The total number of rebase events received",
···
40
45
var externalUserCreationAttempts = promauto.NewCounter(prometheus.CounterOpts{
41
46
Name: "bgs_external_user_creation_attempts",
42
47
Help: "The total number of external users created",
48
+
})
49
+
50
+
var connectedInbound = promauto.NewGauge(prometheus.GaugeOpts{
51
+
Name: "bgs_connected_inbound",
52
+
Help: "Number of inbound firehoses we are consuming",
43
53
})
44
54
45
55
var compactionDuration = promauto.NewHistogram(prometheus.HistogramOpts{
···
80
90
Help: "A histogram of response sizes for requests.",
81
91
Buckets: prometheus.ExponentialBuckets(100, 10, 8),
82
92
}, []string{"code", "method", "path"})
93
+
94
+
var userLookupDuration = promauto.NewHistogram(prometheus.HistogramOpts{
95
+
Name: "relay_user_lookup_duration",
96
+
Help: "A histogram of user lookup latencies",
97
+
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
98
+
})
99
+
100
+
var newUserDiscoveryDuration = promauto.NewHistogram(prometheus.HistogramOpts{
101
+
Name: "relay_new_user_discovery_duration",
102
+
Help: "A histogram of new user discovery latencies",
103
+
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
104
+
})
83
105
84
106
// MetricsMiddleware defines handler function for metrics middleware
85
107
func MetricsMiddleware(next echo.HandlerFunc) echo.HandlerFunc {
+82
-26
carstore/bs.go
+82
-26
carstore/bs.go
···
6
6
"context"
7
7
"fmt"
8
8
"io"
9
+
"log/slog"
9
10
"os"
10
11
"path/filepath"
11
12
"sort"
···
24
25
cbor "github.com/ipfs/go-ipld-cbor"
25
26
ipld "github.com/ipfs/go-ipld-format"
26
27
"github.com/ipfs/go-libipfs/blocks"
27
-
logging "github.com/ipfs/go-log"
28
28
car "github.com/ipld/go-car"
29
29
carutil "github.com/ipld/go-car/util"
30
30
cbg "github.com/whyrusleeping/cbor-gen"
···
41
41
var blockGetTotalCounterUsrskip = blockGetTotalCounter.WithLabelValues("true", "miss")
42
42
var blockGetTotalCounterCached = blockGetTotalCounter.WithLabelValues("false", "hit")
43
43
var blockGetTotalCounterNormal = blockGetTotalCounter.WithLabelValues("false", "miss")
44
-
45
-
var log = logging.Logger("carstore")
46
44
47
45
const MaxSliceLength = 2 << 20
48
46
···
62
60
}
63
61
64
62
type FileCarStore struct {
65
-
meta *CarStoreGormMeta
66
-
rootDir string
63
+
meta *CarStoreGormMeta
64
+
rootDirs []string
67
65
68
66
lscLk sync.Mutex
69
67
lastShardCache map[models.Uid]*CarShard
68
+
69
+
log *slog.Logger
70
70
}
71
71
72
-
func NewCarStore(meta *gorm.DB, root string) (CarStore, error) {
73
-
if _, err := os.Stat(root); err != nil {
74
-
if !os.IsNotExist(err) {
75
-
return nil, err
76
-
}
72
+
func NewCarStore(meta *gorm.DB, roots []string) (CarStore, error) {
73
+
for _, root := range roots {
74
+
if _, err := os.Stat(root); err != nil {
75
+
if !os.IsNotExist(err) {
76
+
return nil, err
77
+
}
77
78
78
-
if err := os.Mkdir(root, 0775); err != nil {
79
-
return nil, err
79
+
if err := os.Mkdir(root, 0775); err != nil {
80
+
return nil, err
81
+
}
80
82
}
81
83
}
82
84
if err := meta.AutoMigrate(&CarShard{}, &blockRef{}); err != nil {
···
88
90
89
91
return &FileCarStore{
90
92
meta: &CarStoreGormMeta{meta: meta},
91
-
rootDir: root,
93
+
rootDirs: roots,
92
94
lastShardCache: make(map[models.Uid]*CarShard),
95
+
log: slog.Default().With("system", "carstore"),
93
96
}, nil
94
97
}
95
98
96
99
type userView struct {
97
-
cs *FileCarStore
100
+
cs CarStore
98
101
user models.Uid
99
102
100
103
cache map[cid.Cid]blockformat.Block
···
108
111
}
109
112
110
113
func (uv *userView) Has(ctx context.Context, k cid.Cid) (bool, error) {
111
-
return uv.cs.meta.HasUidCid(ctx, uv.user, k)
114
+
_, have := uv.cache[k]
115
+
if have {
116
+
return have, nil
117
+
}
118
+
119
+
fcd, ok := uv.cs.(*FileCarStore)
120
+
if !ok {
121
+
return false, nil
122
+
}
123
+
124
+
return fcd.meta.HasUidCid(ctx, uv.user, k)
112
125
}
113
126
114
127
var CacheHits int64
115
128
var CacheMiss int64
116
129
117
130
func (uv *userView) Get(ctx context.Context, k cid.Cid) (blockformat.Block, error) {
131
+
118
132
if !k.Defined() {
119
133
return nil, fmt.Errorf("attempted to 'get' undefined cid")
120
134
}
···
129
143
}
130
144
atomic.AddInt64(&CacheMiss, 1)
131
145
132
-
path, offset, user, err := uv.cs.meta.LookupBlockRef(ctx, k)
146
+
fcd, ok := uv.cs.(*FileCarStore)
147
+
if !ok {
148
+
return nil, ipld.ErrNotFound{Cid: k}
149
+
}
150
+
151
+
path, offset, user, err := fcd.meta.LookupBlockRef(ctx, k)
133
152
if err != nil {
134
153
return nil, err
135
154
}
···
269
288
baseCid cid.Cid
270
289
seq int
271
290
readonly bool
272
-
cs *FileCarStore
291
+
cs CarStore
273
292
lastRev string
274
293
}
275
294
···
541
560
func fnameForShard(user models.Uid, seq int) string {
542
561
return fmt.Sprintf("sh-%d-%d", user, seq)
543
562
}
563
+
564
+
func (cs *FileCarStore) dirForUser(user models.Uid) string {
565
+
return cs.rootDirs[int(user)%len(cs.rootDirs)]
566
+
}
567
+
544
568
func (cs *FileCarStore) openNewShardFile(ctx context.Context, user models.Uid, seq int) (*os.File, string, error) {
545
569
// TODO: some overwrite protections
546
-
fname := filepath.Join(cs.rootDir, fnameForShard(user, seq))
570
+
fname := filepath.Join(cs.dirForUser(user), fnameForShard(user, seq))
547
571
fi, err := os.Create(fname)
548
572
if err != nil {
549
573
return nil, "", err
···
557
581
defer span.End()
558
582
559
583
// TODO: some overwrite protections
560
-
fname := filepath.Join(cs.rootDir, fnameForShard(user, seq))
584
+
fname := filepath.Join(cs.dirForUser(user), fnameForShard(user, seq))
561
585
if err := os.WriteFile(fname, data, 0664); err != nil {
562
586
return "", err
563
587
}
···
579
603
return nil, fmt.Errorf("cannot write to readonly deltaSession")
580
604
}
581
605
582
-
return ds.cs.writeNewShard(ctx, root, rev, ds.user, ds.seq, ds.blks, ds.rmcids)
606
+
switch ocs := ds.cs.(type) {
607
+
case *FileCarStore:
608
+
return ocs.writeNewShard(ctx, root, rev, ds.user, ds.seq, ds.blks, ds.rmcids)
609
+
case *NonArchivalCarstore:
610
+
slice, err := blocksToCar(ctx, root, rev, ds.blks)
611
+
if err != nil {
612
+
return nil, err
613
+
}
614
+
return slice, ocs.updateLastCommit(ctx, ds.user, rev, root)
615
+
default:
616
+
return nil, fmt.Errorf("unsupported carstore type")
617
+
}
583
618
}
584
619
585
620
func WriteCarHeader(w io.Writer, root cid.Cid) (int64, error) {
···
600
635
return hnw, nil
601
636
}
602
637
638
+
func blocksToCar(ctx context.Context, root cid.Cid, rev string, blks map[cid.Cid]blockformat.Block) ([]byte, error) {
639
+
buf := new(bytes.Buffer)
640
+
_, err := WriteCarHeader(buf, root)
641
+
if err != nil {
642
+
return nil, fmt.Errorf("failed to write car header: %w", err)
643
+
}
644
+
645
+
for k, blk := range blks {
646
+
_, err := LdWrite(buf, k.Bytes(), blk.RawData())
647
+
if err != nil {
648
+
return nil, fmt.Errorf("failed to write block: %w", err)
649
+
}
650
+
}
651
+
652
+
return buf.Bytes(), nil
653
+
}
654
+
603
655
func (cs *FileCarStore) writeNewShard(ctx context.Context, root cid.Cid, rev string, user models.Uid, seq int, blks map[cid.Cid]blockformat.Block, rmcids map[cid.Cid]bool) ([]byte, error) {
604
656
605
657
buf := new(bytes.Buffer)
···
638
690
offset += nw
639
691
}
640
692
693
+
start := time.Now()
641
694
path, err := cs.writeNewShardFile(ctx, user, seq, buf.Bytes())
642
695
if err != nil {
643
696
return nil, fmt.Errorf("failed to write shard file: %w", err)
644
697
}
698
+
writeShardFileDuration.Observe(time.Since(start).Seconds())
645
699
646
700
shard := CarShard{
647
701
Root: models.DbCID{CID: root},
···
652
706
Rev: rev,
653
707
}
654
708
709
+
start = time.Now()
655
710
if err := cs.putShard(ctx, &shard, brefs, rmcids, false); err != nil {
656
711
return nil, err
657
712
}
713
+
writeShardMetadataDuration.Observe(time.Since(start).Seconds())
658
714
659
715
return buf.Bytes(), nil
660
716
}
···
872
928
if !os.IsNotExist(err) {
873
929
return err
874
930
}
875
-
log.Warnw("shard file we tried to delete did not exist", "shard", sh.ID, "path", sh.Path)
931
+
cs.log.Warn("shard file we tried to delete did not exist", "shard", sh.ID, "path", sh.Path)
876
932
}
877
933
}
878
934
···
982
1038
// TODO: some overwrite protections
983
1039
// NOTE CreateTemp is used for creating a non-colliding file, but we keep it and don't delete it so don't think of it as "temporary".
984
1040
// This creates "sh-%d-%d%s" with some random stuff in the last position
985
-
fi, err := os.CreateTemp(cs.rootDir, fnameForShard(user, seq))
1041
+
fi, err := os.CreateTemp(cs.dirForUser(user), fnameForShard(user, seq))
986
1042
if err != nil {
987
1043
return nil, "", err
988
1044
}
···
1023
1079
st, err := os.Stat(sh.Path)
1024
1080
if err != nil {
1025
1081
if os.IsNotExist(err) {
1026
-
log.Warnw("missing shard, return size of zero", "path", sh.Path, "shard", sh.ID)
1082
+
slog.Warn("missing shard, return size of zero", "path", sh.Path, "shard", sh.ID, "system", "carstore")
1027
1083
return 0, nil
1028
1084
}
1029
1085
return 0, fmt.Errorf("stat %q: %w", sh.Path, err)
···
1144
1200
// still around but we're doing that anyways since compaction isn't a
1145
1201
// perfect process
1146
1202
1147
-
log.Debugw("repo has dirty dupes", "count", len(dupes), "uid", user, "staleRefs", len(staleRefs), "blockRefs", len(brefs))
1203
+
cs.log.Debug("repo has dirty dupes", "count", len(dupes), "uid", user, "staleRefs", len(staleRefs), "blockRefs", len(brefs))
1148
1204
1149
1205
//return nil, fmt.Errorf("WIP: not currently handling this case")
1150
1206
}
···
1339
1395
}); err != nil {
1340
1396
// If we ever fail to iterate a shard file because its
1341
1397
// corrupted, just log an error and skip the shard
1342
-
log.Errorw("iterating blocks in shard", "shard", s.ID, "err", err, "uid", user)
1398
+
cs.log.Error("iterating blocks in shard", "shard", s.ID, "err", err, "uid", user)
1343
1399
}
1344
1400
}
1345
1401
···
1357
1413
_ = fi.Close()
1358
1414
1359
1415
if err2 := os.Remove(fi.Name()); err2 != nil {
1360
-
log.Errorf("failed to remove shard file (%s) after failed db transaction: %w", fi.Name(), err2)
1416
+
cs.log.Error("failed to remove shard file after failed db transaction", "path", fi.Name(), "err", err2)
1361
1417
}
1362
1418
1363
1419
return err
+18
carstore/metrics.go
+18
carstore/metrics.go
···
1
+
package carstore
2
+
3
+
import (
4
+
"github.com/prometheus/client_golang/prometheus"
5
+
"github.com/prometheus/client_golang/prometheus/promauto"
6
+
)
7
+
8
+
var writeShardFileDuration = promauto.NewHistogram(prometheus.HistogramOpts{
9
+
Name: "carstore_write_shard_file_duration",
10
+
Help: "Duration of writing shard file to disk",
11
+
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
12
+
})
13
+
14
+
var writeShardMetadataDuration = promauto.NewHistogram(prometheus.HistogramOpts{
15
+
Name: "carstore_write_shard_metadata_duration",
16
+
Help: "Duration of writing shard metadata to DB",
17
+
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
18
+
})
+254
carstore/nonarchive.go
+254
carstore/nonarchive.go
···
1
+
package carstore
2
+
3
+
import (
4
+
"bytes"
5
+
"context"
6
+
"fmt"
7
+
"io"
8
+
"log/slog"
9
+
"sync"
10
+
11
+
"github.com/bluesky-social/indigo/models"
12
+
blockformat "github.com/ipfs/go-block-format"
13
+
"github.com/ipfs/go-cid"
14
+
"github.com/ipfs/go-datastore"
15
+
blockstore "github.com/ipfs/go-ipfs-blockstore"
16
+
car "github.com/ipld/go-car"
17
+
"go.opentelemetry.io/otel"
18
+
"gorm.io/gorm"
19
+
"gorm.io/gorm/clause"
20
+
)
21
+
22
+
type NonArchivalCarstore struct {
23
+
db *gorm.DB
24
+
25
+
lk sync.Mutex
26
+
lastCommitCache map[models.Uid]*commitRefInfo
27
+
28
+
log *slog.Logger
29
+
}
30
+
31
+
func NewNonArchivalCarstore(db *gorm.DB) (*NonArchivalCarstore, error) {
32
+
if err := db.AutoMigrate(&commitRefInfo{}); err != nil {
33
+
return nil, err
34
+
}
35
+
36
+
return &NonArchivalCarstore{
37
+
db: db,
38
+
lastCommitCache: make(map[models.Uid]*commitRefInfo),
39
+
log: slog.Default().With("system", "carstorena"),
40
+
}, nil
41
+
}
42
+
43
+
type commitRefInfo struct {
44
+
ID uint `gorm:"primarykey"`
45
+
Uid models.Uid `gorm:"uniqueIndex"`
46
+
Rev string
47
+
Root models.DbCID
48
+
}
49
+
50
+
func (cs *NonArchivalCarstore) checkLastShardCache(user models.Uid) *commitRefInfo {
51
+
cs.lk.Lock()
52
+
defer cs.lk.Unlock()
53
+
54
+
ls, ok := cs.lastCommitCache[user]
55
+
if ok {
56
+
return ls
57
+
}
58
+
59
+
return nil
60
+
}
61
+
62
+
func (cs *NonArchivalCarstore) removeLastShardCache(user models.Uid) {
63
+
cs.lk.Lock()
64
+
defer cs.lk.Unlock()
65
+
66
+
delete(cs.lastCommitCache, user)
67
+
}
68
+
69
+
func (cs *NonArchivalCarstore) putLastShardCache(ls *commitRefInfo) {
70
+
cs.lk.Lock()
71
+
defer cs.lk.Unlock()
72
+
73
+
cs.lastCommitCache[ls.Uid] = ls
74
+
}
75
+
76
+
func (cs *NonArchivalCarstore) loadCommitRefInfo(ctx context.Context, user models.Uid) (*commitRefInfo, error) {
77
+
var out commitRefInfo
78
+
if err := cs.db.Find(&out, "uid = ?", user).Error; err != nil {
79
+
return nil, err
80
+
}
81
+
82
+
return &out, nil
83
+
}
84
+
85
+
func (cs *NonArchivalCarstore) getCommitRefInfo(ctx context.Context, user models.Uid) (*commitRefInfo, error) {
86
+
ctx, span := otel.Tracer("carstore").Start(ctx, "getCommitRefInfo")
87
+
defer span.End()
88
+
89
+
maybeLs := cs.checkLastShardCache(user)
90
+
if maybeLs != nil {
91
+
return maybeLs, nil
92
+
}
93
+
94
+
lastShard, err := cs.loadCommitRefInfo(ctx, user)
95
+
if err != nil {
96
+
return nil, err
97
+
}
98
+
99
+
cs.putLastShardCache(lastShard)
100
+
return lastShard, nil
101
+
}
102
+
103
+
func (cs *NonArchivalCarstore) updateLastCommit(ctx context.Context, uid models.Uid, rev string, cid cid.Cid) error {
104
+
cri := &commitRefInfo{
105
+
Uid: uid,
106
+
Rev: rev,
107
+
Root: models.DbCID{CID: cid},
108
+
}
109
+
110
+
if err := cs.db.Clauses(clause.OnConflict{
111
+
Columns: []clause.Column{{Name: "uid"}},
112
+
UpdateAll: true,
113
+
}).Create(cri).Error; err != nil {
114
+
return fmt.Errorf("update or set last commit info: %w", err)
115
+
}
116
+
117
+
cs.putLastShardCache(cri)
118
+
119
+
return nil
120
+
}
121
+
122
+
func (cs *NonArchivalCarstore) NewDeltaSession(ctx context.Context, user models.Uid, since *string) (*DeltaSession, error) {
123
+
ctx, span := otel.Tracer("carstore").Start(ctx, "NewSession")
124
+
defer span.End()
125
+
126
+
// TODO: ensure that we don't write updates on top of the wrong head
127
+
// this needs to be a compare and swap type operation
128
+
lastShard, err := cs.getCommitRefInfo(ctx, user)
129
+
if err != nil {
130
+
return nil, err
131
+
}
132
+
133
+
if since != nil && *since != lastShard.Rev {
134
+
cs.log.Warn("revision mismatch", "commitSince", since, "lastRev", lastShard.Rev, "err", ErrRepoBaseMismatch)
135
+
}
136
+
137
+
return &DeltaSession{
138
+
fresh: blockstore.NewBlockstore(datastore.NewMapDatastore()),
139
+
blks: make(map[cid.Cid]blockformat.Block),
140
+
base: &userView{
141
+
user: user,
142
+
cs: cs,
143
+
prefetch: true,
144
+
cache: make(map[cid.Cid]blockformat.Block),
145
+
},
146
+
user: user,
147
+
baseCid: lastShard.Root.CID,
148
+
cs: cs,
149
+
seq: 0,
150
+
lastRev: lastShard.Rev,
151
+
}, nil
152
+
}
153
+
154
+
func (cs *NonArchivalCarstore) ReadOnlySession(user models.Uid) (*DeltaSession, error) {
155
+
return &DeltaSession{
156
+
base: &userView{
157
+
user: user,
158
+
cs: cs,
159
+
prefetch: false,
160
+
cache: make(map[cid.Cid]blockformat.Block),
161
+
},
162
+
readonly: true,
163
+
user: user,
164
+
cs: cs,
165
+
}, nil
166
+
}
167
+
168
+
// TODO: incremental is only ever called true, remove the param
169
+
func (cs *NonArchivalCarstore) ReadUserCar(ctx context.Context, user models.Uid, sinceRev string, incremental bool, w io.Writer) error {
170
+
return fmt.Errorf("not supported in non-archival mode")
171
+
}
172
+
173
+
func (cs *NonArchivalCarstore) ImportSlice(ctx context.Context, uid models.Uid, since *string, carslice []byte) (cid.Cid, *DeltaSession, error) {
174
+
ctx, span := otel.Tracer("carstore").Start(ctx, "ImportSlice")
175
+
defer span.End()
176
+
177
+
carr, err := car.NewCarReader(bytes.NewReader(carslice))
178
+
if err != nil {
179
+
return cid.Undef, nil, err
180
+
}
181
+
182
+
if len(carr.Header.Roots) != 1 {
183
+
return cid.Undef, nil, fmt.Errorf("invalid car file, header must have a single root (has %d)", len(carr.Header.Roots))
184
+
}
185
+
186
+
ds, err := cs.NewDeltaSession(ctx, uid, since)
187
+
if err != nil {
188
+
return cid.Undef, nil, fmt.Errorf("new delta session failed: %w", err)
189
+
}
190
+
191
+
var cids []cid.Cid
192
+
for {
193
+
blk, err := carr.Next()
194
+
if err != nil {
195
+
if err == io.EOF {
196
+
break
197
+
}
198
+
return cid.Undef, nil, err
199
+
}
200
+
201
+
cids = append(cids, blk.Cid())
202
+
203
+
if err := ds.Put(ctx, blk); err != nil {
204
+
return cid.Undef, nil, err
205
+
}
206
+
}
207
+
208
+
return carr.Header.Roots[0], ds, nil
209
+
}
210
+
211
+
func (cs *NonArchivalCarstore) GetUserRepoHead(ctx context.Context, user models.Uid) (cid.Cid, error) {
212
+
lastShard, err := cs.getCommitRefInfo(ctx, user)
213
+
if err != nil {
214
+
return cid.Undef, err
215
+
}
216
+
if lastShard.ID == 0 {
217
+
return cid.Undef, nil
218
+
}
219
+
220
+
return lastShard.Root.CID, nil
221
+
}
222
+
223
+
func (cs *NonArchivalCarstore) GetUserRepoRev(ctx context.Context, user models.Uid) (string, error) {
224
+
lastShard, err := cs.getCommitRefInfo(ctx, user)
225
+
if err != nil {
226
+
return "", err
227
+
}
228
+
if lastShard.ID == 0 {
229
+
return "", nil
230
+
}
231
+
232
+
return lastShard.Rev, nil
233
+
}
234
+
235
+
func (cs *NonArchivalCarstore) Stat(ctx context.Context, usr models.Uid) ([]UserStat, error) {
236
+
return nil, nil
237
+
}
238
+
239
+
func (cs *NonArchivalCarstore) WipeUserData(ctx context.Context, user models.Uid) error {
240
+
if err := cs.db.Raw("DELETE from commit_ref_infos WHERE uid = ?", user).Error; err != nil {
241
+
return err
242
+
}
243
+
244
+
cs.removeLastShardCache(user)
245
+
return nil
246
+
}
247
+
248
+
func (cs *NonArchivalCarstore) GetCompactionTargets(ctx context.Context, shardCount int) ([]CompactionTarget, error) {
249
+
return nil, fmt.Errorf("compaction not supported on non-archival")
250
+
}
251
+
252
+
func (cs *NonArchivalCarstore) CompactUserShards(ctx context.Context, user models.Uid, skipBigShards bool) (*CompactionStats, error) {
253
+
return nil, fmt.Errorf("compaction not supported in non-archival")
254
+
}
+8
-3
carstore/repo_test.go
+8
-3
carstore/repo_test.go
···
30
30
return nil, nil, err
31
31
}
32
32
33
-
sharddir := filepath.Join(tempdir, "shards")
34
-
if err := os.MkdirAll(sharddir, 0775); err != nil {
33
+
sharddir1 := filepath.Join(tempdir, "shards1")
34
+
if err := os.MkdirAll(sharddir1, 0775); err != nil {
35
+
return nil, nil, err
36
+
}
37
+
38
+
sharddir2 := filepath.Join(tempdir, "shards2")
39
+
if err := os.MkdirAll(sharddir2, 0775); err != nil {
35
40
return nil, nil, err
36
41
}
37
42
···
45
50
return nil, nil, err
46
51
}
47
52
48
-
cs, err := NewCarStore(db, sharddir)
53
+
cs, err := NewCarStore(db, []string{sharddir1, sharddir2})
49
54
if err != nil {
50
55
return nil, nil, err
51
56
}
+10
-9
cmd/astrolabe/handlers.go
+10
-9
cmd/astrolabe/handlers.go
···
6
6
"net/http"
7
7
"strings"
8
8
9
+
"github.com/bluesky-social/indigo/api/agnostic"
9
10
comatproto "github.com/bluesky-social/indigo/api/atproto"
10
11
_ "github.com/bluesky-social/indigo/api/bsky"
11
12
"github.com/bluesky-social/indigo/atproto/data"
···
64
65
65
66
atid, err := syntax.ParseAtIdentifier(c.Param("atid"))
66
67
if err != nil {
67
-
return echo.NewHTTPError(404, fmt.Sprintf("failed to parse handle or DID"))
68
+
return echo.NewHTTPError(404, "failed to parse handle or DID")
68
69
}
69
70
70
71
ident, err := srv.dir.Lookup(ctx, *atid)
···
96
97
97
98
atid, err := syntax.ParseAtIdentifier(c.Param("atid"))
98
99
if err != nil {
99
-
return echo.NewHTTPError(400, fmt.Sprintf("failed to parse handle or DID"))
100
+
return echo.NewHTTPError(400, "failed to parse handle or DID")
100
101
}
101
102
102
103
ident, err := srv.dir.Lookup(ctx, *atid)
···
133
134
134
135
atid, err := syntax.ParseAtIdentifier(c.Param("atid"))
135
136
if err != nil {
136
-
return echo.NewHTTPError(400, fmt.Sprintf("failed to parse handle or DID"))
137
+
return echo.NewHTTPError(400, "failed to parse handle or DID")
137
138
}
138
139
139
140
collection, err := syntax.ParseNSID(c.Param("collection"))
140
141
if err != nil {
141
-
return echo.NewHTTPError(400, fmt.Sprintf("failed to parse collection NSID"))
142
+
return echo.NewHTTPError(400, "failed to parse collection NSID")
142
143
}
143
144
144
145
ident, err := srv.dir.Lookup(ctx, *atid)
···
161
162
162
163
cursor := c.QueryParam("cursor")
163
164
// collection string, cursor string, limit int64, repo string, reverse bool, rkeyEnd string, rkeyStart string
164
-
resp, err := RepoListRecords(ctx, &xrpcc, collection.String(), cursor, 100, ident.DID.String(), false, "", "")
165
+
resp, err := agnostic.RepoListRecords(ctx, &xrpcc, collection.String(), cursor, 100, ident.DID.String(), false, "", "")
165
166
if err != nil {
166
167
return err
167
168
}
···
191
192
192
193
atid, err := syntax.ParseAtIdentifier(c.Param("atid"))
193
194
if err != nil {
194
-
return echo.NewHTTPError(400, fmt.Sprintf("failed to parse handle or DID"))
195
+
return echo.NewHTTPError(400, "failed to parse handle or DID")
195
196
}
196
197
197
198
collection, err := syntax.ParseNSID(c.Param("collection"))
198
199
if err != nil {
199
-
return echo.NewHTTPError(400, fmt.Sprintf("failed to parse collection NSID"))
200
+
return echo.NewHTTPError(400, "failed to parse collection NSID")
200
201
}
201
202
202
203
rkey, err := syntax.ParseRecordKey(c.Param("rkey"))
203
204
if err != nil {
204
-
return echo.NewHTTPError(400, fmt.Sprintf("failed to parse record key"))
205
+
return echo.NewHTTPError(400, "failed to parse record key")
205
206
}
206
207
207
208
ident, err := srv.dir.Lookup(ctx, *atid)
···
218
219
xrpcc := xrpc.Client{
219
220
Host: ident.PDSEndpoint(),
220
221
}
221
-
resp, err := RepoGetRecord(ctx, &xrpcc, "", collection.String(), ident.DID.String(), rkey.String())
222
+
resp, err := agnostic.RepoGetRecord(ctx, &xrpcc, "", collection.String(), ident.DID.String(), rkey.String())
222
223
if err != nil {
223
224
return echo.NewHTTPError(400, fmt.Sprintf("failed to load record: %s", err))
224
225
}
-42
cmd/astrolabe/repogetRecord.go
-42
cmd/astrolabe/repogetRecord.go
···
1
-
// Copied from indigo:api/atproto/repolistRecords.go
2
-
3
-
package main
4
-
5
-
// schema: com.atproto.repo.getRecord
6
-
7
-
import (
8
-
"context"
9
-
"encoding/json"
10
-
11
-
"github.com/bluesky-social/indigo/xrpc"
12
-
)
13
-
14
-
// RepoGetRecord_Output is the output of a com.atproto.repo.getRecord call.
15
-
type RepoGetRecord_Output struct {
16
-
Cid *string `json:"cid,omitempty" cborgen:"cid,omitempty"`
17
-
Uri string `json:"uri" cborgen:"uri"`
18
-
// NOTE: changed from lex decoder to json.RawMessage
19
-
Value *json.RawMessage `json:"value" cborgen:"value"`
20
-
}
21
-
22
-
// RepoGetRecord calls the XRPC method "com.atproto.repo.getRecord".
23
-
//
24
-
// cid: The CID of the version of the record. If not specified, then return the most recent version.
25
-
// collection: The NSID of the record collection.
26
-
// repo: The handle or DID of the repo.
27
-
// rkey: The Record Key.
28
-
func RepoGetRecord(ctx context.Context, c *xrpc.Client, cid string, collection string, repo string, rkey string) (*RepoGetRecord_Output, error) {
29
-
var out RepoGetRecord_Output
30
-
31
-
params := map[string]interface{}{
32
-
"cid": cid,
33
-
"collection": collection,
34
-
"repo": repo,
35
-
"rkey": rkey,
36
-
}
37
-
if err := c.Do(ctx, xrpc.Query, "", "com.atproto.repo.getRecord", params, nil, &out); err != nil {
38
-
return nil, err
39
-
}
40
-
41
-
return &out, nil
42
-
}
-53
cmd/astrolabe/repolistRecords.go
-53
cmd/astrolabe/repolistRecords.go
···
1
-
// Copied from indigo:api/atproto/repolistRecords.go
2
-
3
-
package main
4
-
5
-
// schema: com.atproto.repo.listRecords
6
-
7
-
import (
8
-
"context"
9
-
"encoding/json"
10
-
11
-
"github.com/bluesky-social/indigo/xrpc"
12
-
)
13
-
14
-
// RepoListRecords_Output is the output of a com.atproto.repo.listRecords call.
15
-
type RepoListRecords_Output struct {
16
-
Cursor *string `json:"cursor,omitempty" cborgen:"cursor,omitempty"`
17
-
Records []*RepoListRecords_Record `json:"records" cborgen:"records"`
18
-
}
19
-
20
-
// RepoListRecords_Record is a "record" in the com.atproto.repo.listRecords schema.
21
-
type RepoListRecords_Record struct {
22
-
Cid string `json:"cid" cborgen:"cid"`
23
-
Uri string `json:"uri" cborgen:"uri"`
24
-
// NOTE: changed from lex decoder to json.RawMessage
25
-
Value *json.RawMessage `json:"value" cborgen:"value"`
26
-
}
27
-
28
-
// RepoListRecords calls the XRPC method "com.atproto.repo.listRecords".
29
-
//
30
-
// collection: The NSID of the record type.
31
-
// limit: The number of records to return.
32
-
// repo: The handle or DID of the repo.
33
-
// reverse: Flag to reverse the order of the returned records.
34
-
// rkeyEnd: DEPRECATED: The highest sort-ordered rkey to stop at (exclusive)
35
-
// rkeyStart: DEPRECATED: The lowest sort-ordered rkey to start from (exclusive)
36
-
func RepoListRecords(ctx context.Context, c *xrpc.Client, collection string, cursor string, limit int64, repo string, reverse bool, rkeyEnd string, rkeyStart string) (*RepoListRecords_Output, error) {
37
-
var out RepoListRecords_Output
38
-
39
-
params := map[string]interface{}{
40
-
"collection": collection,
41
-
"cursor": cursor,
42
-
"limit": limit,
43
-
"repo": repo,
44
-
"reverse": reverse,
45
-
"rkeyEnd": rkeyEnd,
46
-
"rkeyStart": rkeyStart,
47
-
}
48
-
if err := c.Do(ctx, xrpc.Query, "", "com.atproto.repo.listRecords", params, nil, &out); err != nil {
49
-
return nil, err
50
-
}
51
-
52
-
return &out, nil
53
-
}
+2
-2
cmd/beemo/Dockerfile
+2
-2
cmd/beemo/Dockerfile
···
3
3
# podman build -f ./cmd/beemo/Dockerfile -t beemo .
4
4
5
5
### Compile stage
6
-
FROM golang:1.22-alpine3.19 AS build-env
6
+
FROM golang:1.23-alpine3.20 AS build-env
7
7
RUN apk add --no-cache build-base make git
8
8
9
9
ADD . /dockerbuild
···
15
15
go build -tags timetzdata -o /beemo ./cmd/beemo
16
16
17
17
### Run stage
18
-
FROM alpine:3.19
18
+
FROM alpine:3.20
19
19
20
20
RUN apk add --no-cache --update dumb-init ca-certificates
21
21
ENTRYPOINT ["dumb-init", "--"]
+2
-20
cmd/beemo/firehose_consumer.go
+2
-20
cmd/beemo/firehose_consumer.go
···
7
7
"log/slog"
8
8
"net/http"
9
9
"net/url"
10
-
"strings"
11
10
12
11
comatproto "github.com/bluesky-social/indigo/api/atproto"
13
12
appbsky "github.com/bluesky-social/indigo/api/bsky"
···
57
56
)
58
57
logger.Info("beemo firehose scheduler configured", "scheduler", "parallel", "workers", parallelism)
59
58
60
-
return events.HandleRepoStream(ctx, con, scheduler)
61
-
}
62
-
63
-
// TODO: move this to a "ParsePath" helper in syntax package?
64
-
func splitRepoPath(path string) (syntax.NSID, syntax.RecordKey, error) {
65
-
parts := strings.SplitN(path, "/", 3)
66
-
if len(parts) != 2 {
67
-
return "", "", fmt.Errorf("invalid record path: %s", path)
68
-
}
69
-
collection, err := syntax.ParseNSID(parts[0])
70
-
if err != nil {
71
-
return "", "", err
72
-
}
73
-
rkey, err := syntax.ParseRecordKey(parts[1])
74
-
if err != nil {
75
-
return "", "", err
76
-
}
77
-
return collection, rkey, nil
59
+
return events.HandleRepoStream(ctx, con, scheduler, logger)
78
60
}
79
61
80
62
// NOTE: for now, this function basically never errors, just logs and returns nil. Should think through error processing better.
···
102
84
103
85
for _, op := range evt.Ops {
104
86
logger = logger.With("eventKind", op.Action, "path", op.Path)
105
-
collection, rkey, err := splitRepoPath(op.Path)
87
+
collection, rkey, err := syntax.ParseRepoPath(op.Path)
106
88
if err != nil {
107
89
logger.Error("invalid path in repo op")
108
90
return nil
+1
cmd/beemo/notify_reports.go
+1
cmd/beemo/notify_reports.go
+2
-2
cmd/bigsky/Dockerfile
+2
-2
cmd/bigsky/Dockerfile
···
3
3
# podman build -f ./cmd/bigsky/Dockerfile -t bigsky .
4
4
5
5
### Compile stage
6
-
FROM golang:1.22-alpine3.19 AS build-env
6
+
FROM golang:1.23-alpine3.20 AS build-env
7
7
RUN apk add --no-cache build-base make git
8
8
9
9
ADD . /dockerbuild
···
26
26
RUN yarn build
27
27
28
28
### Run stage
29
-
FROM alpine:3.19
29
+
FROM alpine:3.20
30
30
31
31
RUN apk add --no-cache --update dumb-init ca-certificates runit
32
32
ENTRYPOINT ["dumb-init", "--"]
+224
cmd/bigsky/copy_pdses.py
+224
cmd/bigsky/copy_pdses.py
···
1
+
#!/usr/bin/env python3
2
+
#
3
+
# pip install requests
4
+
#
5
+
# python3 copy_pdses.py --admin-key hunter2 --source-url http://srcrelay:2470 --dest-url http://destrelay:2470
6
+
7
+
import json
8
+
import logging
9
+
import sys
10
+
import urllib.parse
11
+
12
+
import requests
13
+
14
+
logger = logging.getLogger(__name__)
15
+
16
+
class relay:
17
+
def __init__(self, rooturl, headers=None, session=None):
18
+
"rooturl string, headers dict or None, session requests.Session() or None"
19
+
self.rooturl = rooturl
20
+
self.headers = headers or dict()
21
+
self.session = session or requests.Session()
22
+
23
+
def crawl(self, host):
24
+
pheaders = dict(self.headers)
25
+
pheaders['Content-Type'] = 'application/json'
26
+
url = urllib.parse.urljoin(self.rooturl, '/admin/pds/requestCrawl')
27
+
response = self.session.post(url, headers=pheaders, data=json.dumps({"hostname": host}))
28
+
if response.status_code != 200:
29
+
return False
30
+
return True
31
+
32
+
def crawlAndSetLimits(self, host, limits):
33
+
"host string, limits dict"
34
+
if not self.crawl(host):
35
+
logger.error("%s %s : %d %r", url, host, response.status_code, response.text)
36
+
return
37
+
if limits is None:
38
+
logger.debug("requestCrawl %s OK", host)
39
+
if self.setLimits(host, limits):
40
+
logger.debug("requestCrawl + changeLimits %s OK", host)
41
+
def setLimits(self, host, limits):
42
+
url = urllib.parse.urljoin(self.rooturl, '/admin/pds/changeLimits')
43
+
plimits = dict(limits)
44
+
plimits["host"] = host
45
+
pheaders = dict(self.headers)
46
+
pheaders['Content-Type'] = 'application/json'
47
+
response = self.session.post(url, headers=pheaders, data=json.dumps(plimits))
48
+
if response.status_code != 200:
49
+
logger.error("%s %s : %d %r", url, host, response.status_code, response.text)
50
+
return False
51
+
return True
52
+
53
+
def crawlAndBlock(self, host):
54
+
"make relay aware of PDS, and block it"
55
+
if not self.crawl(host):
56
+
logger.error("%s %s : %d %r", url, host, response.status_code, response.text)
57
+
return
58
+
if self.block(host):
59
+
logger.debug("requestCrawl + block %s OK", host)
60
+
61
+
def block(self, host):
62
+
url = urllib.parse.urljoin(self.rooturl, '/admin/pds/block')
63
+
response = self.session.post(url, headers=self.headers, data='', params={"host":host})
64
+
if response.status_code != 200:
65
+
logger.error("%s %s : %d %r", url, host, response.status_code, response.text)
66
+
return False
67
+
return True
68
+
69
+
def unblock(self, host):
70
+
url = urllib.parse.urljoin(self.rooturl, '/admin/pds/unblock')
71
+
response = self.session.post(url, headers=self.headers, data='', params={"host":host})
72
+
if response.status_code != 200:
73
+
logger.error("%s %s : %d %r", url, host, response.status_code, response.text)
74
+
return False
75
+
return True
76
+
77
+
def pdsList(self):
78
+
"GET /admin/pds/list"
79
+
url = urllib.parse.urljoin(self.rooturl, '/admin/pds/list')
80
+
response = self.session.get(url, headers=self.headers)
81
+
if response.status_code != 200:
82
+
logger.error("%s : %d %r", url, response.status_code, response.text)
83
+
return None
84
+
return response.json()
85
+
86
+
def makeByHost(they):
87
+
out = dict()
88
+
for rec in they:
89
+
out[rec['Host']] = rec
90
+
return out
91
+
92
+
def makeLimits(rec):
93
+
"for submitting to changeLimits"
94
+
return {
95
+
"host": rec['Host'],
96
+
"per_second":rec['RateLimit'],
97
+
"per_hour":rec['HourlyEventLimit'],
98
+
"per_day":rec['DailyEventLimit'],
99
+
"crawl_rate":rec['CrawlRateLimit'],
100
+
"repo_limit":rec['RepoLimit'],
101
+
}
102
+
103
+
def makeRequestCrawl(rec):
104
+
"for submitting to requestCrawl"
105
+
return {"hostname":rec["Host"]}
106
+
107
+
def de(a,b):
108
+
# dict equal
109
+
for ka, va in a.items():
110
+
vb = b[ka]
111
+
if (va is None) and (vb is None):
112
+
continue
113
+
if va == vb:
114
+
continue
115
+
return False
116
+
for kb in b.keys():
117
+
if kb not in a:
118
+
return False
119
+
return True
120
+
121
+
def main():
122
+
import argparse
123
+
ap = argparse.ArgumentParser()
124
+
ap.add_argument('--admin-key', default=None, help='relay auth bearer token', required=True)
125
+
ap.add_argument('--source-url', default=None, help='base url to GET /admin/pds/list')
126
+
ap.add_argument('--source-json', default=None, help='load /admin/pds/list json from file')
127
+
ap.add_argument('--dest-url', default=None, help='dest URL to POST requestCrawl etc to')
128
+
ap.add_argument('--dry-run', default=False, action='store_true')
129
+
ap.add_argument('--verbose', default=False, action='store_true')
130
+
args = ap.parse_args()
131
+
132
+
if args.verbose:
133
+
logging.basicConfig(level=logging.DEBUG)
134
+
else:
135
+
logging.basicConfig(level=logging.INFO)
136
+
137
+
headers = {'Authorization': 'Bearer ' + args.admin_key}
138
+
139
+
if args.source_json:
140
+
with open(args.source_json, 'rt') as fin:
141
+
sourceList = json.load(fin)
142
+
elif args.source_url:
143
+
relaySession = relay(args.source_url, headers)
144
+
sourceList = relaySession.pdsList()
145
+
else:
146
+
sys.stdout.write("need --source-url or --source-json\n")
147
+
sys.exit(1)
148
+
149
+
r2 = relay(args.dest_url, headers)
150
+
destList = r2.pdsList()
151
+
152
+
source = makeByHost(sourceList)
153
+
dests = makeByHost(destList)
154
+
155
+
snotd = []
156
+
dnots = []
157
+
diflim = []
158
+
difblock = []
159
+
recrawl = []
160
+
161
+
for k1, v1 in source.items():
162
+
v2 = dests.get(k1)
163
+
if v2 is None:
164
+
snotd.append(v1)
165
+
continue
166
+
lim1 = makeLimits(v1)
167
+
lim2 = makeLimits(v2)
168
+
if v1["Blocked"] != v2["Blocked"]:
169
+
difblock.append((k1,v1["Blocked"]))
170
+
if v1["Blocked"]:
171
+
continue
172
+
if not de(lim1, lim2):
173
+
diflim.append(lim1)
174
+
if v1["HasActiveConnection"] and not v2["HasActiveConnection"]:
175
+
recrawl.append(k1)
176
+
for k2 in dests.keys():
177
+
if k2 not in source:
178
+
dnots.append(k2)
179
+
180
+
logger.debug("%d source not dest", len(snotd))
181
+
for rec in snotd:
182
+
if rec["Blocked"]:
183
+
if args.dry_run:
184
+
sys.stdout.write("crawl and block: {!r}\n".format(rec["Host"]))
185
+
else:
186
+
r2.crawlAndBlock(rec["Host"])
187
+
else:
188
+
limits = makeLimits(rec)
189
+
if args.dry_run:
190
+
sys.stdout.write("crawl and limit: {}\n".format(json.dumps(limits)))
191
+
else:
192
+
r2.crawlAndSetLimits(rec["Host"], limits)
193
+
logger.debug("adjust limits: %d", len(diflim))
194
+
for limits in diflim:
195
+
if args.dry_run:
196
+
sys.stdout.write("set limits: {}\n".format(json.dumps(limits)))
197
+
else:
198
+
r2.setLimits(limits["host"], limits)
199
+
logger.debug("adjust block status: %d", len(difblock))
200
+
for host, blocked in difblock:
201
+
if args.dry_run:
202
+
sys.stdout.write("{} block={}\n".format(host, blocked))
203
+
else:
204
+
if blocked:
205
+
r2.block(host)
206
+
else:
207
+
r2.unblock(host)
208
+
logger.debug("restart requestCrawl: %d", len(recrawl))
209
+
for host in recrawl:
210
+
if args.dry_run:
211
+
logger.info("requestCrawl %s", host)
212
+
else:
213
+
if r2.crawl(host):
214
+
logger.debug("requestCrawl %s OK", host)
215
+
logger.info("%d in dest but not source", len(dnots))
216
+
for k2 in dnots:
217
+
logger.debug("%s", k2)
218
+
219
+
220
+
221
+
222
+
223
+
if __name__ == '__main__':
224
+
main()
+111
-31
cmd/bigsky/main.go
+111
-31
cmd/bigsky/main.go
···
3
3
import (
4
4
"context"
5
5
"fmt"
6
+
"log/slog"
6
7
"net/http"
7
8
_ "net/http/pprof"
9
+
"net/url"
8
10
"os"
9
11
"os/signal"
10
12
"path/filepath"
···
29
31
_ "go.uber.org/automaxprocs"
30
32
31
33
"github.com/carlmjohnson/versioninfo"
32
-
logging "github.com/ipfs/go-log"
33
34
"github.com/urfave/cli/v2"
34
35
"go.opentelemetry.io/otel"
35
36
"go.opentelemetry.io/otel/attribute"
···
41
42
"gorm.io/plugin/opentelemetry/tracing"
42
43
)
43
44
44
-
var log = logging.Logger("bigsky")
45
+
var log = slog.Default().With("system", "bigsky")
45
46
46
47
func init() {
47
48
// control log level using, eg, GOLOG_LOG_LEVEL=debug
···
50
51
51
52
func main() {
52
53
if err := run(os.Args); err != nil {
53
-
log.Fatal(err)
54
+
slog.Error(err.Error())
55
+
os.Exit(1)
54
56
}
55
57
}
56
58
···
189
191
EnvVars: []string{"RELAY_DID_CACHE_SIZE"},
190
192
Value: 5_000_000,
191
193
},
194
+
&cli.StringSliceFlag{
195
+
Name: "did-memcached",
196
+
EnvVars: []string{"RELAY_DID_MEMCACHED"},
197
+
},
192
198
&cli.DurationFlag{
193
199
Name: "event-playback-ttl",
194
200
Usage: "time to live for event playback buffering (only applies to disk persister)",
···
200
206
EnvVars: []string{"RELAY_NUM_COMPACTION_WORKERS"},
201
207
Value: 2,
202
208
},
209
+
&cli.StringSliceFlag{
210
+
Name: "carstore-shard-dirs",
211
+
Usage: "specify list of shard directories for carstore storage, overrides default storage within datadir",
212
+
EnvVars: []string{"RELAY_CARSTORE_SHARD_DIRS"},
213
+
},
214
+
&cli.StringSliceFlag{
215
+
Name: "next-crawler",
216
+
Usage: "forward POST requestCrawl to this url, should be machine root url and not xrpc/requestCrawl, comma separated list",
217
+
EnvVars: []string{"RELAY_NEXT_CRAWLER"},
218
+
},
219
+
&cli.BoolFlag{
220
+
Name: "non-archival",
221
+
EnvVars: []string{"RELAY_NON_ARCHIVAL"},
222
+
Value: false,
223
+
},
203
224
}
204
225
205
226
app.Action = runBigsky
···
213
234
env = "dev"
214
235
}
215
236
if cctx.Bool("jaeger") {
216
-
url := "http://localhost:14268/api/traces"
217
-
exp, err := jaeger.New(jaeger.WithCollectorEndpoint(jaeger.WithEndpoint(url)))
237
+
jaegerUrl := "http://localhost:14268/api/traces"
238
+
exp, err := jaeger.New(jaeger.WithCollectorEndpoint(jaeger.WithEndpoint(jaegerUrl)))
218
239
if err != nil {
219
240
return err
220
241
}
···
240
261
// At a minimum, you need to set
241
262
// OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
242
263
if ep := cctx.String("otel-exporter-otlp-endpoint"); ep != "" {
243
-
log.Infow("setting up trace exporter", "endpoint", ep)
264
+
slog.Info("setting up trace exporter", "endpoint", ep)
244
265
ctx, cancel := context.WithCancel(context.Background())
245
266
defer cancel()
246
267
247
268
exp, err := otlptracehttp.New(ctx)
248
269
if err != nil {
249
-
log.Fatalw("failed to create trace exporter", "error", err)
270
+
slog.Error("failed to create trace exporter", "error", err)
271
+
os.Exit(1)
250
272
}
251
273
defer func() {
252
274
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
253
275
defer cancel()
254
276
if err := exp.Shutdown(ctx); err != nil {
255
-
log.Errorw("failed to shutdown trace exporter", "error", err)
277
+
slog.Error("failed to shutdown trace exporter", "error", err)
256
278
}
257
279
}()
258
280
···
277
299
signals := make(chan os.Signal, 1)
278
300
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
279
301
302
+
_, err := cliutil.SetupSlog(cliutil.LogOptions{})
303
+
if err != nil {
304
+
return err
305
+
}
306
+
280
307
// start observability/tracing (OTEL and jaeger)
281
308
if err := setupOTEL(cctx); err != nil {
282
309
return err
···
289
316
return err
290
317
}
291
318
292
-
log.Infow("setting up main database")
319
+
slog.Info("setting up main database")
293
320
dburl := cctx.String("db-url")
294
321
db, err := cliutil.SetupDatabase(dburl, cctx.Int("max-metadb-connections"))
295
322
if err != nil {
296
323
return err
297
324
}
298
325
299
-
log.Infow("setting up carstore database")
326
+
slog.Info("setting up carstore database")
300
327
csdburl := cctx.String("carstore-db-url")
301
328
csdb, err := cliutil.SetupDatabase(csdburl, cctx.Int("max-carstore-connections"))
302
329
if err != nil {
···
312
339
}
313
340
}
314
341
315
-
os.MkdirAll(filepath.Dir(csdir), os.ModePerm)
316
-
cstore, err := carstore.NewCarStore(csdb, csdir)
317
-
if err != nil {
318
-
return err
342
+
csdirs := []string{csdir}
343
+
if paramDirs := cctx.StringSlice("carstore-shard-dirs"); len(paramDirs) > 0 {
344
+
csdirs = paramDirs
319
345
}
320
346
321
-
mr := did.NewMultiResolver()
347
+
for _, csd := range csdirs {
348
+
if err := os.MkdirAll(filepath.Dir(csd), os.ModePerm); err != nil {
349
+
return err
350
+
}
351
+
}
322
352
323
-
didr := &api.PLCServer{Host: cctx.String("plc-host")}
324
-
mr.AddHandler("plc", didr)
353
+
var cstore carstore.CarStore
325
354
326
-
webr := did.WebResolver{}
327
-
if cctx.Bool("crawl-insecure-ws") {
328
-
webr.Insecure = true
355
+
if cctx.Bool("non-archival") {
356
+
cs, err := carstore.NewNonArchivalCarstore(csdb)
357
+
if err != nil {
358
+
return err
359
+
}
360
+
361
+
cstore = cs
362
+
} else {
363
+
cs, err := carstore.NewCarStore(csdb, csdirs)
364
+
if err != nil {
365
+
return err
366
+
}
367
+
368
+
cstore = cs
329
369
}
330
-
mr.AddHandler("web", &webr)
370
+
371
+
// DID RESOLUTION
372
+
// 1. the outside world, PLCSerever or Web
373
+
// 2. (maybe memcached)
374
+
// 3. in-process cache
375
+
var cachedidr did.Resolver
376
+
{
377
+
mr := did.NewMultiResolver()
331
378
332
-
cachedidr := plc.NewCachingDidResolver(mr, time.Hour*24, cctx.Int("did-cache-size"))
379
+
didr := &api.PLCServer{Host: cctx.String("plc-host")}
380
+
mr.AddHandler("plc", didr)
381
+
382
+
webr := did.WebResolver{}
383
+
if cctx.Bool("crawl-insecure-ws") {
384
+
webr.Insecure = true
385
+
}
386
+
mr.AddHandler("web", &webr)
387
+
388
+
var prevResolver did.Resolver
389
+
memcachedServers := cctx.StringSlice("did-memcached")
390
+
if len(memcachedServers) > 0 {
391
+
prevResolver = plc.NewMemcachedDidResolver(mr, time.Hour*24, memcachedServers)
392
+
} else {
393
+
prevResolver = mr
394
+
}
395
+
396
+
cachedidr = plc.NewCachingDidResolver(prevResolver, time.Hour*24, cctx.Int("did-cache-size"))
397
+
}
333
398
334
399
kmgr := indexer.NewKeyManager(cachedidr, nil)
335
400
···
338
403
var persister events.EventPersistence
339
404
340
405
if dpd := cctx.String("disk-persister-dir"); dpd != "" {
341
-
log.Infow("setting up disk persister")
406
+
slog.Info("setting up disk persister")
342
407
343
408
pOpts := events.DefaultDiskPersistOptions()
344
409
pOpts.Retention = cctx.Duration("event-playback-ttl")
···
361
426
362
427
rf := indexer.NewRepoFetcher(db, repoman, cctx.Int("max-fetch-concurrency"))
363
428
364
-
ix, err := indexer.NewIndexer(db, notifman, evtman, cachedidr, rf, true, cctx.Bool("spidering"), false)
429
+
ix, err := indexer.NewIndexer(db, notifman, evtman, cachedidr, rf, true, false, cctx.Bool("spidering"))
365
430
if err != nil {
366
431
return err
367
432
}
433
+
defer ix.Shutdown()
368
434
369
435
rlskip := cctx.String("bsky-social-rate-limit-skip")
370
436
ix.ApplyPDSClientSettings = func(c *xrpc.Client) {
···
387
453
388
454
repoman.SetEventHandler(func(ctx context.Context, evt *repomgr.RepoEvent) {
389
455
if err := ix.HandleRepoEvent(ctx, evt); err != nil {
390
-
log.Errorw("failed to handle repo event", "err", err)
456
+
slog.Error("failed to handle repo event", "err", err)
391
457
}
392
458
}, false)
393
459
···
411
477
}
412
478
}
413
479
414
-
log.Infow("constructing bgs")
480
+
slog.Info("constructing bgs")
415
481
bgsConfig := libbgs.DefaultBGSConfig()
416
482
bgsConfig.SSL = !cctx.Bool("crawl-insecure-ws")
417
483
bgsConfig.CompactInterval = cctx.Duration("compact-interval")
···
419
485
bgsConfig.MaxQueuePerPDS = cctx.Int64("max-queue-per-pds")
420
486
bgsConfig.DefaultRepoLimit = cctx.Int64("default-repo-limit")
421
487
bgsConfig.NumCompactionWorkers = cctx.Int("num-compaction-workers")
488
+
nextCrawlers := cctx.StringSlice("next-crawler")
489
+
if len(nextCrawlers) != 0 {
490
+
nextCrawlerUrls := make([]*url.URL, len(nextCrawlers))
491
+
for i, tu := range nextCrawlers {
492
+
var err error
493
+
nextCrawlerUrls[i], err = url.Parse(tu)
494
+
if err != nil {
495
+
return fmt.Errorf("failed to parse next-crawler url: %w", err)
496
+
}
497
+
slog.Info("configuring relay for requestCrawl", "host", nextCrawlerUrls[i])
498
+
}
499
+
bgsConfig.NextCrawlers = nextCrawlerUrls
500
+
}
422
501
bgs, err := libbgs.NewBGS(db, ix, repoman, evtman, cachedidr, rf, hr, bgsConfig)
423
502
if err != nil {
424
503
return err
···
433
512
// set up metrics endpoint
434
513
go func() {
435
514
if err := bgs.StartMetrics(cctx.String("metrics-listen")); err != nil {
436
-
log.Fatalf("failed to start metrics endpoint: %s", err)
515
+
log.Error("failed to start metrics endpoint", "err", err)
516
+
os.Exit(1)
437
517
}
438
518
}()
439
519
···
444
524
bgsErr <- err
445
525
}()
446
526
447
-
log.Infow("startup complete")
527
+
slog.Info("startup complete")
448
528
select {
449
529
case <-signals:
450
530
log.Info("received shutdown signal")
451
531
errs := bgs.Shutdown()
452
532
for err := range errs {
453
-
log.Errorw("error during BGS shutdown", "err", err)
533
+
slog.Error("error during BGS shutdown", "err", err)
454
534
}
455
535
case err := <-bgsErr:
456
536
if err != nil {
457
-
log.Errorw("error during BGS startup", "err", err)
537
+
slog.Error("error during BGS startup", "err", err)
458
538
}
459
539
log.Info("shutting down")
460
540
errs := bgs.Shutdown()
461
541
for err := range errs {
462
-
log.Errorw("error during BGS shutdown", "err", err)
542
+
slog.Error("error during BGS shutdown", "err", err)
463
543
}
464
544
}
465
545
+115
cmd/bigsky/resync_pdses.py
+115
cmd/bigsky/resync_pdses.py
···
1
+
#!/usr/bin/env python3
2
+
#
3
+
# pip install requests
4
+
#
5
+
# python3 resync_pdses.py --admin-key hunter2 --url http://myrelay:2470 host_per_line.txt
6
+
7
+
import json
8
+
import sys
9
+
import urllib.parse
10
+
11
+
import requests
12
+
13
+
14
+
# pds limits for POST /admin/pds/changeLimits
15
+
# {"host":"", "per_second": int, "per_hour": int, "per_day": int, "crawl_rate": int, "repo_limit": int}
16
+
17
+
limitsKeys = ('per_second', 'per_hour', 'per_day', 'crawl_rate', 'repo_limit')
18
+
19
+
def checkLimits(limits):
20
+
for k in limits.keys():
21
+
if k not in limitsKeys:
22
+
raise Exception(f"unknown pds rate limits key {k!r}")
23
+
return True
24
+
25
+
class relay:
26
+
def __init__(self, rooturl, headers=None, session=None):
27
+
"rooturl string, headers dict or None, session requests.Session() or None"
28
+
self.rooturl = rooturl
29
+
self.headers = headers or dict()
30
+
self.session = session or requests.Session()
31
+
32
+
def resync(self, host):
33
+
"host string"
34
+
url = urllib.parse.urljoin(self.rooturl, '/admin/pds/resync')
35
+
response = self.session.post(url, params={"host": host}, headers=self.headers, data='')
36
+
if response.status_code != 200:
37
+
sys.stderr.write(f"{url}?host={host} : ({response.status_code}) ({response.text!r})\n")
38
+
else:
39
+
sys.stderr.write(f"{url}?host={host} : OK\n")
40
+
41
+
def crawlAndSetLimits(self, host, limits):
42
+
"host string, limits dict"
43
+
pheaders = dict(self.headers)
44
+
pheaders['Content-Type'] = 'application/json'
45
+
url = urllib.parse.urljoin(self.rooturl, '/admin/pds/requestCrawl')
46
+
response = self.session.post(url, headers=pheaders, data=json.dumps({"hostname": host}))
47
+
if response.status_code != 200:
48
+
sys.stderr.write(f"{url} {host} : {response.status_code} {response.text!r}\n")
49
+
return
50
+
if limits is None:
51
+
sys.stderr.write(f"requestCrawl {host} OK\n")
52
+
url = urllib.parse.urljoin(self.rooturl, '/admin/pds/changeLimits')
53
+
plimits = dict(limits)
54
+
plimits["host"] = host
55
+
response = self.session.post(url, headers=pheaders, data=json.dumps(plimits))
56
+
if response.status_code != 200:
57
+
sys.stderr.write(f"{url} {host} : {response.status_code} {response.text!r}\n")
58
+
return
59
+
sys.stderr.write(f"requestCrawl + changeLimits {host} OK\n")
60
+
61
+
def main():
62
+
import argparse
63
+
ap = argparse.ArgumentParser()
64
+
ap.add_argument('input', default='-', help='host per line text file to read, - for stdin')
65
+
ap.add_argument('--admin-key', default=None, help='relay auth bearer token', required=True)
66
+
ap.add_argument('--url', default=None, help='base url to POST /admin/pds/resync', required=True)
67
+
ap.add_argument('--resync', default=False, action='store_true', help='resync selected PDSes')
68
+
ap.add_argument('--limits', default=None, help='json pds rate limits')
69
+
ap.add_argument('--crawl', default=False, action='store_true', help='crawl & set limits')
70
+
args = ap.parse_args()
71
+
72
+
headers = {'Authorization': 'Bearer ' + args.admin_key}
73
+
74
+
relaySession = relay(args.url, headers)
75
+
76
+
#url = urllib.parse.urljoin(args.url, '/admin/pds/resync')
77
+
78
+
#sess = requests.Session()
79
+
if args.crawl and args.resync:
80
+
sys.stderr.write("should only specify one of --resync --crawl")
81
+
sys.exit(1)
82
+
if (not args.crawl) and (not args.resync):
83
+
sys.stderr.write("should specify one of --resync --crawl")
84
+
sys.exit(1)
85
+
86
+
limits = None
87
+
if args.limits:
88
+
limits = json.loads(args.limits)
89
+
checkLimits(limits)
90
+
91
+
if args.input == '-':
92
+
fin = sys.stdin
93
+
else:
94
+
fin = open(args.input, 'rt')
95
+
for line in fin:
96
+
if not line:
97
+
continue
98
+
line = line.strip()
99
+
if not line:
100
+
continue
101
+
if line[0] == '#':
102
+
continue
103
+
host = line
104
+
if args.crawl:
105
+
relaySession.crawlAndSetLimits(host, limits)
106
+
elif args.resync:
107
+
relaySession.resync(host)
108
+
# response = sess.post(url, params={"host": line}, headers=headers)
109
+
# if response.status_code != 200:
110
+
# sys.stderr.write(f"{url}?host={line} : ({response.status_code}) ({response.text!r})\n")
111
+
# else:
112
+
# sys.stderr.write(f"{url}?host={line} : OK\n")
113
+
114
+
if __name__ == '__main__':
115
+
main()
+6
-1
cmd/goat/account.go
+6
-1
cmd/goat/account.go
···
38
38
EnvVars: []string{"ATP_AUTH_PASSWORD"},
39
39
},
40
40
&cli.StringFlag{
41
+
Name: "auth-factor-token",
42
+
Usage: "token required if password is used and 2fa is required",
43
+
EnvVars: []string{"ATP_AUTH_FACTOR_TOKEN"},
44
+
},
45
+
&cli.StringFlag{
41
46
Name: "pds-host",
42
47
Usage: "URL of the PDS to create account on (overrides DID doc)",
43
48
EnvVars: []string{"ATP_PDS_HOST"},
···
163
168
return err
164
169
}
165
170
166
-
_, err = refreshAuthSession(ctx, *username, cctx.String("app-password"), cctx.String("pds-host"))
171
+
_, err = refreshAuthSession(ctx, *username, cctx.String("app-password"), cctx.String("pds-host"), cctx.String("auth-factor-token"))
167
172
return err
168
173
}
169
174
+7
-6
cmd/goat/account_migrate.go
+7
-6
cmd/goat/account_migrate.go
···
9
9
"strings"
10
10
"time"
11
11
12
+
"github.com/bluesky-social/indigo/api/agnostic"
12
13
comatproto "github.com/bluesky-social/indigo/api/atproto"
13
14
"github.com/bluesky-social/indigo/atproto/syntax"
14
15
"github.com/bluesky-social/indigo/xrpc"
···
166
167
167
168
slog.Info("migrating preferences")
168
169
// TODO: service proxy header for AppView?
169
-
prefResp, err := ActorGetPreferences(ctx, oldClient)
170
+
prefResp, err := agnostic.ActorGetPreferences(ctx, oldClient)
170
171
if err != nil {
171
172
return fmt.Errorf("failed fetching old preferences: %w", err)
172
173
}
173
-
err = ActorPutPreferences(ctx, &newClient, &ActorPutPreferences_Input{
174
+
err = agnostic.ActorPutPreferences(ctx, &newClient, &agnostic.ActorPutPreferences_Input{
174
175
Preferences: prefResp.Preferences,
175
176
})
176
177
if err != nil {
···
214
215
// NOTE: to work with did:web or non-PDS-managed did:plc, need to do manual migraiton process
215
216
slog.Info("updating identity to new host")
216
217
217
-
credsResp, err := IdentityGetRecommendedDidCredentials(ctx, &newClient)
218
+
credsResp, err := agnostic.IdentityGetRecommendedDidCredentials(ctx, &newClient)
218
219
if err != nil {
219
220
return fmt.Errorf("failed fetching new credentials: %w", err)
220
221
}
···
223
224
return nil
224
225
}
225
226
226
-
var unsignedOp IdentitySignPlcOperation_Input
227
+
var unsignedOp agnostic.IdentitySignPlcOperation_Input
227
228
if err = json.Unmarshal(credsBytes, &unsignedOp); err != nil {
228
229
return fmt.Errorf("failed parsing PLC op: %w", err)
229
230
}
···
231
232
232
233
// NOTE: could add additional sanity checks here that any extra rotation keys were retained, and that old alsoKnownAs and service entries are retained? The stakes aren't super high for the later, as PLC has the full history. PLC and the new PDS already implement some basic sanity checks.
233
234
234
-
signedPlcOpResp, err := IdentitySignPlcOperation(ctx, oldClient, &unsignedOp)
235
+
signedPlcOpResp, err := agnostic.IdentitySignPlcOperation(ctx, oldClient, &unsignedOp)
235
236
if err != nil {
236
237
return fmt.Errorf("failed requesting PLC operation signature: %w", err)
237
238
}
238
239
239
-
err = IdentitySubmitPlcOperation(ctx, &newClient, &IdentitySubmitPlcOperation_Input{
240
+
err = agnostic.IdentitySubmitPlcOperation(ctx, &newClient, &agnostic.IdentitySubmitPlcOperation_Input{
240
241
Operation: signedPlcOpResp.Operation,
241
242
})
242
243
if err != nil {
+5
-4
cmd/goat/account_plc.go
+5
-4
cmd/goat/account_plc.go
···
6
6
"fmt"
7
7
"os"
8
8
9
+
"github.com/bluesky-social/indigo/api/agnostic"
9
10
comatproto "github.com/bluesky-social/indigo/api/atproto"
10
11
11
12
"github.com/urfave/cli/v2"
···
56
57
return err
57
58
}
58
59
59
-
resp, err := IdentityGetRecommendedDidCredentials(ctx, xrpcc)
60
+
resp, err := agnostic.IdentityGetRecommendedDidCredentials(ctx, xrpcc)
60
61
if err != nil {
61
62
return err
62
63
}
···
109
110
return err
110
111
}
111
112
112
-
var body IdentitySignPlcOperation_Input
113
+
var body agnostic.IdentitySignPlcOperation_Input
113
114
if err = json.Unmarshal(fileBytes, &body); err != nil {
114
115
return fmt.Errorf("failed decoding PLC op JSON: %w", err)
115
116
}
···
119
120
body.Token = &token
120
121
}
121
122
122
-
resp, err := IdentitySignPlcOperation(ctx, xrpcc, &body)
123
+
resp, err := agnostic.IdentitySignPlcOperation(ctx, xrpcc, &body)
123
124
if err != nil {
124
125
return err
125
126
}
···
158
159
return fmt.Errorf("failed decoding PLC op JSON: %w", err)
159
160
}
160
161
161
-
err = IdentitySubmitPlcOperation(ctx, xrpcc, &IdentitySubmitPlcOperation_Input{
162
+
err = agnostic.IdentitySubmitPlcOperation(ctx, xrpcc, &agnostic.IdentitySubmitPlcOperation_Input{
162
163
Operation: &op,
163
164
})
164
165
if err != nil {
+2
-2
cmd/goat/actorgetPreferences.go
api/agnostic/actorgetPreferences.go
+2
-2
cmd/goat/actorgetPreferences.go
api/agnostic/actorgetPreferences.go
+2
-2
cmd/goat/actorputPreferences.go
api/agnostic/actorputPreferences.go
+2
-2
cmd/goat/actorputPreferences.go
api/agnostic/actorputPreferences.go
+10
-5
cmd/goat/auth.go
+10
-5
cmd/goat/auth.go
···
77
77
}
78
78
resp, err := comatproto.ServerRefreshSession(ctx, &client)
79
79
if err != nil {
80
-
// TODO: if failure, try creating a new session from password
80
+
// TODO: if failure, try creating a new session from password (2fa tokens are only valid once, so not reused)
81
81
fmt.Println("trying to refresh auth from password...")
82
-
as, err := refreshAuthSession(ctx, sess.DID.AtIdentifier(), sess.Password, sess.PDS)
82
+
as, err := refreshAuthSession(ctx, sess.DID.AtIdentifier(), sess.Password, sess.PDS, "")
83
83
if err != nil {
84
84
return nil, err
85
85
}
···
96
96
return &client, nil
97
97
}
98
98
99
-
func refreshAuthSession(ctx context.Context, username syntax.AtIdentifier, password, pdsURL string) (*AuthSession, error) {
99
+
func refreshAuthSession(ctx context.Context, username syntax.AtIdentifier, password, pdsURL, authFactorToken string) (*AuthSession, error) {
100
100
101
101
var did syntax.DID
102
102
if pdsURL == "" {
···
120
120
client := xrpc.Client{
121
121
Host: pdsURL,
122
122
}
123
+
var token *string
124
+
if authFactorToken != "" {
125
+
token = &authFactorToken
126
+
}
123
127
sess, err := comatproto.ServerCreateSession(ctx, &client, &comatproto.ServerCreateSession_Input{
124
-
Identifier: username.String(),
125
-
Password: password,
128
+
Identifier: username.String(),
129
+
Password: password,
130
+
AuthFactorToken: token,
126
131
})
127
132
if err != nil {
128
133
return nil, err
+10
-1
cmd/goat/blob.go
+10
-1
cmd/goat/blob.go
···
28
28
Aliases: []string{"o"},
29
29
Usage: "directory to store blobs in",
30
30
},
31
+
&cli.StringFlag{
32
+
Name: "pds-host",
33
+
Usage: "URL of the PDS to export blobs from (overrides DID doc)",
34
+
},
31
35
},
32
36
Action: runBlobExport,
33
37
},
···
73
77
return err
74
78
}
75
79
80
+
pdsHost := cctx.String("pds-host")
81
+
if pdsHost == "" {
82
+
pdsHost = ident.PDSEndpoint()
83
+
}
84
+
76
85
// create a new API client to connect to the account's PDS
77
86
xrpcc := xrpc.Client{
78
-
Host: ident.PDSEndpoint(),
87
+
Host: pdsHost,
79
88
}
80
89
if xrpcc.Host == "" {
81
90
return fmt.Errorf("no PDS endpoint for identity")
+4
-2
cmd/goat/bsky_prefs.go
+4
-2
cmd/goat/bsky_prefs.go
···
6
6
"fmt"
7
7
"os"
8
8
9
+
"github.com/bluesky-social/indigo/api/agnostic"
10
+
9
11
"github.com/urfave/cli/v2"
10
12
)
11
13
···
39
41
}
40
42
41
43
// TODO: does indigo API code crash with unsupported preference '$type'? Eg "Lexicon decoder" with unsupported type.
42
-
resp, err := ActorGetPreferences(ctx, xrpcc)
44
+
resp, err := agnostic.ActorGetPreferences(ctx, xrpcc)
43
45
if err != nil {
44
46
return fmt.Errorf("failed fetching old preferences: %w", err)
45
47
}
···
77
79
return err
78
80
}
79
81
80
-
err = ActorPutPreferences(ctx, xrpcc, &ActorPutPreferences_Input{
82
+
err = agnostic.ActorPutPreferences(ctx, xrpcc, &agnostic.ActorPutPreferences_Input{
81
83
Preferences: prefsArray,
82
84
})
83
85
if err != nil {
+2
-19
cmd/goat/firehose.go
+2
-19
cmd/goat/firehose.go
···
130
130
rsc.EventHandler,
131
131
)
132
132
slog.Info("starting firehose consumer", "relayHost", relayHost)
133
-
return events.HandleRepoStream(ctx, con, scheduler)
134
-
}
135
-
136
-
// TODO: move this to a "ParsePath" helper in syntax package?
137
-
func splitRepoPath(path string) (syntax.NSID, syntax.RecordKey, error) {
138
-
parts := strings.SplitN(path, "/", 3)
139
-
if len(parts) != 2 {
140
-
return "", "", fmt.Errorf("invalid record path: %s", path)
141
-
}
142
-
collection, err := syntax.ParseNSID(parts[0])
143
-
if err != nil {
144
-
return "", "", err
145
-
}
146
-
rkey, err := syntax.ParseRecordKey(parts[1])
147
-
if err != nil {
148
-
return "", "", err
149
-
}
150
-
return collection, rkey, nil
133
+
return events.HandleRepoStream(ctx, con, scheduler, nil)
151
134
}
152
135
153
136
func (gfc *GoatFirehoseConsumer) handleIdentityEvent(ctx context.Context, evt *comatproto.SyncSubscribeRepos_Identity) error {
···
229
212
}
230
213
231
214
for _, op := range evt.Ops {
232
-
collection, rkey, err := splitRepoPath(op.Path)
215
+
collection, rkey, err := syntax.ParseRepoPath(op.Path)
233
216
if err != nil {
234
217
logger.Error("invalid path in repo op", "eventKind", op.Action, "path", op.Path)
235
218
return nil
+20
-2
cmd/goat/identity.go
+20
-2
cmd/goat/identity.go
···
15
15
Name: "resolve",
16
16
Usage: "lookup identity metadata",
17
17
ArgsUsage: `<at-identifier>`,
18
-
Flags: []cli.Flag{},
19
-
Action: runResolve,
18
+
Flags: []cli.Flag{
19
+
&cli.BoolFlag{
20
+
Name: "did",
21
+
Usage: "just resolve to DID",
22
+
},
23
+
},
24
+
Action: runResolve,
20
25
}
21
26
22
27
func runResolve(cctx *cli.Context) error {
···
33
38
dir := identity.BaseDirectory{}
34
39
var doc *identity.DIDDocument
35
40
41
+
if cctx.Bool("did") {
42
+
if atid.IsDID() {
43
+
}
44
+
}
45
+
36
46
if atid.IsDID() {
37
47
did, err := atid.AsDID()
38
48
if err != nil {
39
49
return err
40
50
}
51
+
if cctx.Bool("did") {
52
+
fmt.Println(did)
53
+
return nil
54
+
}
41
55
doc, err = dir.ResolveDID(ctx, did)
42
56
if err != nil {
43
57
return err
···
50
64
did, err := dir.ResolveHandle(ctx, handle)
51
65
if err != nil {
52
66
return err
67
+
}
68
+
if cctx.Bool("did") {
69
+
fmt.Println(did)
70
+
return nil
53
71
}
54
72
doc, err = dir.ResolveDID(ctx, did)
55
73
if err != nil {
+1
-1
cmd/goat/identitygetRecommendedDidCredentials.go
api/agnostic/identitygetRecommendedDidCredentials.go
+1
-1
cmd/goat/identitygetRecommendedDidCredentials.go
api/agnostic/identitygetRecommendedDidCredentials.go
+1
-1
cmd/goat/identitysignPlcOperation.go
api/agnostic/identitysignPlcOperation.go
+1
-1
cmd/goat/identitysignPlcOperation.go
api/agnostic/identitysignPlcOperation.go
+1
-1
cmd/goat/identitysubmitPlcOperation.go
api/agnostic/identitysubmitPlcOperation.go
+1
-1
cmd/goat/identitysubmitPlcOperation.go
api/agnostic/identitysubmitPlcOperation.go
+2
-1
cmd/goat/net.go
+2
-1
cmd/goat/net.go
···
5
5
"fmt"
6
6
"log/slog"
7
7
8
+
"github.com/bluesky-social/indigo/api/agnostic"
8
9
"github.com/bluesky-social/indigo/atproto/data"
9
10
"github.com/bluesky-social/indigo/atproto/identity"
10
11
"github.com/bluesky-social/indigo/atproto/syntax"
···
17
18
xrpcc := xrpc.Client{
18
19
Host: ident.PDSEndpoint(),
19
20
}
20
-
resp, err := RepoGetRecord(ctx, &xrpcc, "", aturi.Collection().String(), ident.DID.String(), aturi.RecordKey().String())
21
+
resp, err := agnostic.RepoGetRecord(ctx, &xrpcc, "", aturi.Collection().String(), ident.DID.String(), aturi.RecordKey().String())
21
22
if err != nil {
22
23
return nil, err
23
24
}
+5
-4
cmd/goat/record.go
+5
-4
cmd/goat/record.go
···
6
6
"fmt"
7
7
"os"
8
8
9
+
"github.com/bluesky-social/indigo/api/agnostic"
9
10
comatproto "github.com/bluesky-social/indigo/api/atproto"
10
11
"github.com/bluesky-social/indigo/atproto/data"
11
12
"github.com/bluesky-social/indigo/atproto/identity"
···
179
180
cursor := ""
180
181
for {
181
182
// collection string, cursor string, limit int64, repo string, reverse bool, rkeyEnd string, rkeyStart string
182
-
resp, err := RepoListRecords(ctx, &xrpcc, nsid, cursor, 100, ident.DID.String(), false, "", "")
183
+
resp, err := agnostic.RepoListRecords(ctx, &xrpcc, nsid, cursor, 100, ident.DID.String(), false, "", "")
183
184
if err != nil {
184
185
return err
185
186
}
···
246
247
}
247
248
validate := !cctx.Bool("no-validate")
248
249
249
-
resp, err := RepoCreateRecord(ctx, xrpcc, &RepoCreateRecord_Input{
250
+
resp, err := agnostic.RepoCreateRecord(ctx, xrpcc, &agnostic.RepoCreateRecord_Input{
250
251
Collection: nsid,
251
252
Repo: xrpcc.Auth.Did,
252
253
Record: recordVal,
···
293
294
rkey := cctx.String("rkey")
294
295
295
296
// NOTE: need to fetch existing record CID to perform swap. this is optional in theory, but golang can't deal with "optional" and "nullable", so we always need to set this (?)
296
-
existing, err := RepoGetRecord(ctx, xrpcc, "", nsid, xrpcc.Auth.Did, rkey)
297
+
existing, err := agnostic.RepoGetRecord(ctx, xrpcc, "", nsid, xrpcc.Auth.Did, rkey)
297
298
if err != nil {
298
299
return err
299
300
}
···
305
306
306
307
validate := !cctx.Bool("no-validate")
307
308
308
-
resp, err := RepoPutRecord(ctx, xrpcc, &RepoPutRecord_Input{
309
+
resp, err := agnostic.RepoPutRecord(ctx, xrpcc, &agnostic.RepoPutRecord_Input{
309
310
Collection: nsid,
310
311
Repo: xrpcc.Auth.Did,
311
312
Record: recordVal,
+165
-5
cmd/goat/repo.go
+165
-5
cmd/goat/repo.go
···
4
4
"bytes"
5
5
"context"
6
6
"encoding/json"
7
+
"errors"
7
8
"fmt"
8
9
"os"
9
10
"path/filepath"
11
+
"strings"
10
12
"time"
11
13
12
14
comatproto "github.com/bluesky-social/indigo/api/atproto"
13
15
"github.com/bluesky-social/indigo/atproto/data"
14
16
"github.com/bluesky-social/indigo/atproto/syntax"
17
+
"github.com/bluesky-social/indigo/mst"
15
18
"github.com/bluesky-social/indigo/repo"
19
+
"github.com/bluesky-social/indigo/util"
16
20
"github.com/bluesky-social/indigo/xrpc"
17
21
18
22
"github.com/ipfs/go-cid"
23
+
cbor "github.com/ipfs/go-ipld-cbor"
24
+
ipld "github.com/ipfs/go-ipld-format"
19
25
"github.com/urfave/cli/v2"
26
+
"github.com/xlab/treeprint"
20
27
)
21
28
22
29
var cmdRepo = &cli.Command{
···
59
66
Action: runRepoInspect,
60
67
},
61
68
&cli.Command{
69
+
Name: "mst",
70
+
Usage: "show repo MST structure",
71
+
ArgsUsage: `<car-file>`,
72
+
Flags: []cli.Flag{
73
+
&cli.BoolFlag{
74
+
Name: "full-cid",
75
+
Aliases: []string{"f"},
76
+
Usage: "display full CIDs",
77
+
},
78
+
&cli.StringFlag{
79
+
Name: "root",
80
+
Aliases: []string{"r"},
81
+
Usage: "CID of root block",
82
+
},
83
+
},
84
+
Action: runRepoMST,
85
+
},
86
+
&cli.Command{
62
87
Name: "unpack",
63
88
Usage: "extract records from CAR file as directory of JSON files",
64
89
ArgsUsage: `<car-file>`,
···
93
118
return fmt.Errorf("no PDS endpoint for identity")
94
119
}
95
120
121
+
// set longer timeout, for large CAR files
122
+
xrpcc.Client = util.RobustHTTPClient()
123
+
xrpcc.Client.Timeout = 600 * time.Second
124
+
96
125
carPath := cctx.String("output")
97
126
if carPath == "" {
98
127
// NOTE: having the rev in the the path might be nice
99
128
now := time.Now().Format("20060102150405")
100
129
carPath = fmt.Sprintf("%s.%s.car", username, now)
101
130
}
102
-
// NOTE: there is a race condition, but nice to give a friendly error earlier before downloading
103
-
if _, err := os.Stat(carPath); err == nil {
104
-
return fmt.Errorf("file already exists: %s", carPath)
131
+
output, err := getFileOrStdout(carPath)
132
+
if err != nil {
133
+
if errors.Is(err, os.ErrExist) {
134
+
return fmt.Errorf("file already exists: %s", carPath)
135
+
}
136
+
return err
105
137
}
106
-
fmt.Printf("downloading from %s to: %s\n", xrpcc.Host, carPath)
138
+
defer output.Close()
139
+
if carPath != stdIOPath {
140
+
fmt.Printf("downloading from %s to: %s\n", xrpcc.Host, carPath)
141
+
}
107
142
repoBytes, err := comatproto.SyncGetRepo(ctx, &xrpcc, ident.DID.String(), "")
108
143
if err != nil {
109
144
return err
110
145
}
111
-
return os.WriteFile(carPath, repoBytes, 0666)
146
+
if _, err := output.Write(repoBytes); err != nil {
147
+
return err
148
+
}
149
+
return nil
112
150
}
113
151
114
152
func runRepoImport(cctx *cli.Context) error {
···
192
230
// TODO: Signature?
193
231
194
232
return nil
233
+
}
234
+
235
+
func runRepoMST(cctx *cli.Context) error {
236
+
ctx := context.Background()
237
+
opts := repoMSTOptions{
238
+
carPath: cctx.Args().First(),
239
+
fullCID: cctx.Bool("full-cid"),
240
+
root: cctx.String("root"),
241
+
}
242
+
// read from file or stdin
243
+
if opts.carPath == "" {
244
+
return fmt.Errorf("need to provide path to CAR file as argument")
245
+
}
246
+
inputCAR, err := getFileOrStdin(opts.carPath)
247
+
if err != nil {
248
+
return err
249
+
}
250
+
// read repository tree in to memory
251
+
r, err := repo.ReadRepoFromCar(ctx, inputCAR)
252
+
if err != nil {
253
+
return err
254
+
}
255
+
cst := util.CborStore(r.Blockstore())
256
+
// determine which root cid to use, defaulting to repo data root
257
+
rootCID := r.DataCid()
258
+
if opts.root != "" {
259
+
optsRootCID, err := cid.Decode(opts.root)
260
+
if err != nil {
261
+
return err
262
+
}
263
+
rootCID = optsRootCID
264
+
}
265
+
// start walking mst
266
+
exists, err := nodeExists(ctx, cst, rootCID)
267
+
if err != nil {
268
+
return err
269
+
}
270
+
tree := treeprint.NewWithRoot(displayCID(&rootCID, exists, opts))
271
+
if exists {
272
+
if err := walkMST(ctx, cst, rootCID, tree, opts); err != nil {
273
+
return err
274
+
}
275
+
}
276
+
// print tree
277
+
fmt.Println(tree.String())
278
+
return nil
279
+
}
280
+
281
+
func walkMST(ctx context.Context, cst *cbor.BasicIpldStore, cid cid.Cid, tree treeprint.Tree, opts repoMSTOptions) error {
282
+
var node mst.NodeData
283
+
if err := cst.Get(ctx, cid, &node); err != nil {
284
+
return err
285
+
}
286
+
if node.Left != nil {
287
+
exists, err := nodeExists(ctx, cst, *node.Left)
288
+
if err != nil {
289
+
return err
290
+
}
291
+
subtree := tree.AddBranch(displayCID(node.Left, exists, opts))
292
+
if exists {
293
+
if err := walkMST(ctx, cst, *node.Left, subtree, opts); err != nil {
294
+
return err
295
+
}
296
+
}
297
+
}
298
+
for _, entry := range node.Entries {
299
+
exists, err := nodeExists(ctx, cst, entry.Val)
300
+
if err != nil {
301
+
return err
302
+
}
303
+
tree.AddNode(displayEntryVal(&entry, exists, opts))
304
+
if entry.Tree != nil {
305
+
exists, err := nodeExists(ctx, cst, *entry.Tree)
306
+
if err != nil {
307
+
return err
308
+
}
309
+
subtree := tree.AddBranch(displayCID(entry.Tree, exists, opts))
310
+
if exists {
311
+
if err := walkMST(ctx, cst, *entry.Tree, subtree, opts); err != nil {
312
+
return err
313
+
}
314
+
}
315
+
}
316
+
}
317
+
return nil
318
+
}
319
+
320
+
func displayEntryVal(entry *mst.TreeEntry, exists bool, opts repoMSTOptions) string {
321
+
key := string(entry.KeySuffix)
322
+
divider := " "
323
+
if opts.fullCID {
324
+
divider = "\n"
325
+
}
326
+
return strings.Repeat("∙", int(entry.PrefixLen)) + key + divider + displayCID(&entry.Val, exists, opts)
327
+
}
328
+
329
+
func displayCID(cid *cid.Cid, exists bool, opts repoMSTOptions) string {
330
+
cidDisplay := cid.String()
331
+
if !opts.fullCID {
332
+
cidDisplay = "…" + string(cidDisplay[len(cidDisplay)-7:])
333
+
}
334
+
connector := "─◉"
335
+
if !exists {
336
+
connector = "─◌"
337
+
}
338
+
return "[" + cidDisplay + "]" + connector
339
+
}
340
+
341
+
type repoMSTOptions struct {
342
+
carPath string
343
+
fullCID bool
344
+
root string
345
+
}
346
+
347
+
func nodeExists(ctx context.Context, cst *cbor.BasicIpldStore, cid cid.Cid) (bool, error) {
348
+
if _, err := cst.Blocks.Get(ctx, cid); err != nil {
349
+
if errors.Is(err, ipld.ErrNotFound{}) {
350
+
return false, nil
351
+
}
352
+
return false, err
353
+
}
354
+
return true, nil
195
355
}
196
356
197
357
func runRepoUnpack(cctx *cli.Context) error {
+1
-1
cmd/goat/repocreateRecord.go
api/agnostic/repocreateRecord.go
+1
-1
cmd/goat/repocreateRecord.go
api/agnostic/repocreateRecord.go
+1
-1
cmd/goat/repogetRecord.go
api/agnostic/repogetRecord.go
+1
-1
cmd/goat/repogetRecord.go
api/agnostic/repogetRecord.go
+1
-1
cmd/goat/repolistRecords.go
api/agnostic/repolistRecords.go
+1
-1
cmd/goat/repolistRecords.go
api/agnostic/repolistRecords.go
+1
-1
cmd/goat/repoputRecord.go
api/agnostic/repoputRecord.go
+1
-1
cmd/goat/repoputRecord.go
api/agnostic/repoputRecord.go
+26
cmd/goat/util.go
+26
cmd/goat/util.go
···
2
2
3
3
import (
4
4
"context"
5
+
"io"
6
+
"os"
5
7
6
8
"github.com/bluesky-social/indigo/atproto/identity"
7
9
"github.com/bluesky-social/indigo/atproto/syntax"
···
16
18
dir := identity.DefaultDirectory()
17
19
return dir.Lookup(ctx, *id)
18
20
}
21
+
22
+
const stdIOPath = "-"
23
+
24
+
func getFileOrStdin(path string) (io.Reader, error) {
25
+
if path == stdIOPath {
26
+
return os.Stdin, nil
27
+
}
28
+
file, err := os.Open(path)
29
+
if err != nil {
30
+
return nil, err
31
+
}
32
+
return file, nil
33
+
}
34
+
35
+
func getFileOrStdout(path string) (io.WriteCloser, error) {
36
+
if path == stdIOPath {
37
+
return os.Stdout, nil
38
+
}
39
+
file, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0666)
40
+
if err != nil {
41
+
return nil, err
42
+
}
43
+
return file, nil
44
+
}
+2
cmd/gosky/admin.go
+2
cmd/gosky/admin.go
···
403
403
false, // hasComment bool
404
404
false, // includeAllUserRecords bool
405
405
100, // limit int64
406
+
nil, // policies []string
406
407
nil, // removedLabels []string
407
408
nil, // removedTags []string
408
409
nil, // reportTypes []string
···
717
718
false, // hasComment bool
718
719
false, // includeAllUserRecords bool
719
720
100, // limit int64
721
+
nil, // policies []string
720
722
nil, // removedLabels []string
721
723
nil, // removedTags []string
722
724
nil, // reportTypes []string
+1
-1
cmd/gosky/bsky.go
+1
-1
cmd/gosky/bsky.go
+2
-2
cmd/gosky/car.go
+2
-2
cmd/gosky/car.go
···
64
64
if topDir == "" {
65
65
topDir = did.String()
66
66
}
67
-
log.Infof("writing output to: %s", topDir)
67
+
log.Info("writing output", "topDir", topDir)
68
68
69
69
commitPath := topDir + "/_commit"
70
70
os.MkdirAll(filepath.Dir(commitPath), os.ModePerm)
···
90
90
if err != nil {
91
91
return err
92
92
}
93
-
log.Debugf("processing record: %s", k)
93
+
log.Debug("processing record", "rec", k)
94
94
95
95
// TODO: check if path is safe more carefully
96
96
recPath := topDir + "/" + k
+15
-9
cmd/gosky/debug.go
+15
-9
cmd/gosky/debug.go
···
106
106
}
107
107
108
108
seqScheduler := sequential.NewScheduler("debug-inspect-event", rsc.EventHandler)
109
-
err = events.HandleRepoStream(ctx, con, seqScheduler)
109
+
err = events.HandleRepoStream(ctx, con, seqScheduler, nil)
110
110
if err != errFoundIt {
111
111
return err
112
112
}
···
284
284
},
285
285
}
286
286
seqScheduler := sequential.NewScheduler("debug-stream", rsc.EventHandler)
287
-
err = events.HandleRepoStream(ctx, con, seqScheduler)
287
+
err = events.HandleRepoStream(ctx, con, seqScheduler, nil)
288
288
if err != nil {
289
289
return err
290
290
}
···
390
390
go func(i int, url string) {
391
391
con, _, err := d.Dial(url, http.Header{})
392
392
if err != nil {
393
-
log.Fatalf("Dial failure on url%d: %s", i+1, err)
393
+
log.Error("Dial failure", "i", i, "url", url, "err", err)
394
+
os.Exit(1)
394
395
}
395
396
396
397
ctx := context.TODO()
···
405
406
},
406
407
}
407
408
seqScheduler := sequential.NewScheduler(fmt.Sprintf("debug-stream-%d", i+1), rsc.EventHandler)
408
-
if err := events.HandleRepoStream(ctx, con, seqScheduler); err != nil {
409
-
log.Fatalf("HandleRepoStream failure on url%d: %s", i+1, err)
409
+
if err := events.HandleRepoStream(ctx, con, seqScheduler, nil); err != nil {
410
+
log.Error("HandleRepoStream failure", "i", i, "url", url, "err", err)
411
+
os.Exit(1)
410
412
}
411
413
}(i, url)
412
414
}
···
876
878
logger := log.With("host", cctx.String("host-1"))
877
879
repo1bytes, err := comatproto.SyncGetRepo(ctx, &xrpc1, did.String(), "")
878
880
if err != nil {
879
-
logger.Fatalf("getting repo: %s", err)
881
+
logger.Error("getting repo", "err", err)
882
+
os.Exit(1)
880
883
return
881
884
}
882
885
883
886
rep1, err = repo.ReadRepoFromCar(ctx, bytes.NewReader(repo1bytes))
884
887
if err != nil {
885
-
logger.Fatalf("reading repo: %s", err)
888
+
logger.Error("reading repo", "err", err)
889
+
os.Exit(1)
886
890
return
887
891
}
888
892
}()
···
893
897
logger := log.With("host", cctx.String("host-2"))
894
898
repo2bytes, err := comatproto.SyncGetRepo(ctx, &xrpc2, did.String(), "")
895
899
if err != nil {
896
-
logger.Fatalf("getting repo: %s", err)
900
+
logger.Error("getting repo", "err", err)
901
+
os.Exit(1)
897
902
return
898
903
}
899
904
900
905
rep2, err = repo.ReadRepoFromCar(ctx, bytes.NewReader(repo2bytes))
901
906
if err != nil {
902
-
logger.Fatalf("reading repo: %s", err)
907
+
logger.Error("reading repo", "err", err)
908
+
os.Exit(1)
903
909
return
904
910
}
905
911
}()
+25
-4
cmd/gosky/main.go
+25
-4
cmd/gosky/main.go
···
7
7
"encoding/json"
8
8
"fmt"
9
9
"io"
10
+
"log/slog"
10
11
"net/http"
11
12
"os"
12
13
"os/signal"
···
18
19
"github.com/bluesky-social/indigo/api/atproto"
19
20
comatproto "github.com/bluesky-social/indigo/api/atproto"
20
21
"github.com/bluesky-social/indigo/api/bsky"
22
+
"github.com/bluesky-social/indigo/atproto/identity"
21
23
"github.com/bluesky-social/indigo/atproto/syntax"
22
24
"github.com/bluesky-social/indigo/events"
23
25
"github.com/bluesky-social/indigo/events/schedulers/sequential"
···
38
40
_ "github.com/joho/godotenv/autoload"
39
41
40
42
"github.com/carlmjohnson/versioninfo"
41
-
logging "github.com/ipfs/go-log"
42
43
"github.com/polydawn/refmt/cbor"
43
44
rejson "github.com/polydawn/refmt/json"
44
45
"github.com/polydawn/refmt/shared"
45
46
cli "github.com/urfave/cli/v2"
46
47
)
47
48
48
-
var log = logging.Logger("gosky")
49
+
var log = slog.Default().With("system", "gosky")
49
50
50
51
func main() {
51
52
run(os.Args)
···
79
80
EnvVars: []string{"ATP_PLC_HOST"},
80
81
},
81
82
}
83
+
84
+
_, err := cliutil.SetupSlog(cliutil.LogOptions{})
85
+
if err != nil {
86
+
fmt.Fprintf(os.Stderr, "logging setup error: %s\n", err.Error())
87
+
os.Exit(1)
88
+
return
89
+
}
90
+
82
91
app.Commands = []*cli.Command{
83
92
accountCmd,
84
93
adminCmd,
···
338
347
},
339
348
}
340
349
seqScheduler := sequential.NewScheduler(con.RemoteAddr().String(), rsc.EventHandler)
341
-
return events.HandleRepoStream(ctx, con, seqScheduler)
350
+
return events.HandleRepoStream(ctx, con, seqScheduler, log)
342
351
},
343
352
}
344
353
···
462
471
return fmt.Errorf("unrecognized link")
463
472
}
464
473
474
+
atid, err := syntax.ParseAtIdentifier(did)
475
+
if err != nil {
476
+
return err
477
+
}
478
+
479
+
resp, err := identity.DefaultDirectory().Lookup(ctx, *atid)
480
+
if err != nil {
481
+
return err
482
+
}
483
+
484
+
xrpcc.Host = resp.PDSEndpoint()
485
+
465
486
out, err := comatproto.RepoGetRecord(ctx, xrpcc, "", collection, did, rkey)
466
487
if err != nil {
467
488
return err
···
490
511
491
512
rc, rec, err := rr.GetRecord(ctx, cctx.Args().First())
492
513
if err != nil {
493
-
return err
514
+
return fmt.Errorf("get record failed: %w", err)
494
515
}
495
516
496
517
if cctx.Bool("raw") {
+4
-4
cmd/gosky/streamdiff.go
+4
-4
cmd/gosky/streamdiff.go
···
58
58
},
59
59
}
60
60
seqScheduler := sequential.NewScheduler("streamA", rsc.EventHandler)
61
-
err = events.HandleRepoStream(ctx, cona, seqScheduler)
61
+
err = events.HandleRepoStream(ctx, cona, seqScheduler, log)
62
62
if err != nil {
63
-
log.Errorf("stream A failed: %s", err)
63
+
log.Error("stream A failed", "err", err)
64
64
}
65
65
}()
66
66
···
82
82
}
83
83
84
84
seqScheduler := sequential.NewScheduler("streamB", rsc.EventHandler)
85
-
err = events.HandleRepoStream(ctx, conb, seqScheduler)
85
+
err = events.HandleRepoStream(ctx, conb, seqScheduler, log)
86
86
if err != nil {
87
-
log.Errorf("stream B failed: %s", err)
87
+
log.Error("stream B failed", "err", err)
88
88
}
89
89
}()
90
90
+1
-1
cmd/gosky/sync.go
+1
-1
cmd/gosky/sync.go
+2
-2
cmd/hepa/Dockerfile
+2
-2
cmd/hepa/Dockerfile
···
3
3
# podman build -f ./cmd/hepa/Dockerfile -t hepa .
4
4
5
5
### Compile stage
6
-
FROM golang:1.22-alpine3.19 AS build-env
6
+
FROM golang:1.23-alpine3.20 AS build-env
7
7
RUN apk add --no-cache build-base make git
8
8
9
9
ADD . /dockerbuild
···
15
15
go build -tags timetzdata -o /hepa ./cmd/hepa
16
16
17
17
### Run stage
18
-
FROM alpine:3.19
18
+
FROM alpine:3.20
19
19
20
20
RUN apk add --no-cache --update dumb-init ca-certificates
21
21
ENTRYPOINT ["dumb-init", "--"]
+44
-20
cmd/hepa/main.go
+44
-20
cmd/hepa/main.go
···
149
149
Usage: "secret token for prescreen server",
150
150
EnvVars: []string{"HEPA_PRESCREEN_TOKEN"},
151
151
},
152
+
&cli.DurationFlag{
153
+
Name: "report-dupe-period",
154
+
Usage: "time period within which automod will not re-report an account for the same reasonType",
155
+
EnvVars: []string{"HEPA_REPORT_DUPE_PERIOD"},
156
+
Value: 1 * 24 * time.Hour,
157
+
},
158
+
&cli.IntFlag{
159
+
Name: "quota-mod-report-day",
160
+
Usage: "number of reports automod can file per day, for all subjects and types combined (circuit breaker)",
161
+
EnvVars: []string{"HEPA_QUOTA_MOD_REPORT_DAY"},
162
+
Value: 10000,
163
+
},
164
+
&cli.IntFlag{
165
+
Name: "quota-mod-takedown-day",
166
+
Usage: "number of takedowns automod can action per day, for all subjects combined (circuit breaker)",
167
+
EnvVars: []string{"HEPA_QUOTA_MOD_TAKEDOWN_DAY"},
168
+
Value: 200,
169
+
},
170
+
&cli.IntFlag{
171
+
Name: "quota-mod-action-day",
172
+
Usage: "number of misc actions automod can do per day, for all subjects combined (circuit breaker)",
173
+
EnvVars: []string{"HEPA_QUOTA_MOD_ACTION_DAY"},
174
+
Value: 2000,
175
+
},
152
176
}
153
177
154
178
app.Commands = []*cli.Command{
···
237
261
dir,
238
262
Config{
239
263
Logger: logger,
240
-
RelayHost: cctx.String("atp-relay-host"), // DEPRECATED
241
264
BskyHost: cctx.String("atp-bsky-host"),
242
265
OzoneHost: cctx.String("atp-ozone-host"),
243
266
OzoneDID: cctx.String("ozone-did"),
···
252
275
AbyssPassword: cctx.String("abyss-password"),
253
276
RatelimitBypass: cctx.String("ratelimit-bypass"),
254
277
RulesetName: cctx.String("ruleset"),
255
-
FirehoseParallelism: cctx.Int("firehose-parallelism"), // DEPRECATED
256
278
PreScreenHost: cctx.String("prescreen-host"),
257
279
PreScreenToken: cctx.String("prescreen-token"),
280
+
ReportDupePeriod: cctx.Duration("report-dupe-period"),
281
+
QuotaModReportDay: cctx.Int("quota-mod-report-day"),
282
+
QuotaModTakedownDay: cctx.Int("quota-mod-takedown-day"),
283
+
QuotaModActionDay: cctx.Int("quota-mod-action-day"),
258
284
},
259
285
)
260
286
if err != nil {
···
332
358
return NewServer(
333
359
dir,
334
360
Config{
335
-
Logger: logger,
336
-
RelayHost: cctx.String("atp-relay-host"),
337
-
BskyHost: cctx.String("atp-bsky-host"),
338
-
OzoneHost: cctx.String("atp-ozone-host"),
339
-
OzoneDID: cctx.String("ozone-did"),
340
-
OzoneAdminToken: cctx.String("ozone-admin-token"),
341
-
PDSHost: cctx.String("atp-pds-host"),
342
-
PDSAdminToken: cctx.String("pds-admin-token"),
343
-
SetsFileJSON: cctx.String("sets-json-path"),
344
-
RedisURL: cctx.String("redis-url"),
345
-
HiveAPIToken: cctx.String("hiveai-api-token"),
346
-
AbyssHost: cctx.String("abyss-host"),
347
-
AbyssPassword: cctx.String("abyss-password"),
348
-
RatelimitBypass: cctx.String("ratelimit-bypass"),
349
-
RulesetName: cctx.String("ruleset"),
350
-
FirehoseParallelism: cctx.Int("firehose-parallelism"),
351
-
PreScreenHost: cctx.String("prescreen-host"),
352
-
PreScreenToken: cctx.String("prescreen-token"),
361
+
Logger: logger,
362
+
BskyHost: cctx.String("atp-bsky-host"),
363
+
OzoneHost: cctx.String("atp-ozone-host"),
364
+
OzoneDID: cctx.String("ozone-did"),
365
+
OzoneAdminToken: cctx.String("ozone-admin-token"),
366
+
PDSHost: cctx.String("atp-pds-host"),
367
+
PDSAdminToken: cctx.String("pds-admin-token"),
368
+
SetsFileJSON: cctx.String("sets-json-path"),
369
+
RedisURL: cctx.String("redis-url"),
370
+
HiveAPIToken: cctx.String("hiveai-api-token"),
371
+
AbyssHost: cctx.String("abyss-host"),
372
+
AbyssPassword: cctx.String("abyss-password"),
373
+
RatelimitBypass: cctx.String("ratelimit-bypass"),
374
+
RulesetName: cctx.String("ruleset"),
375
+
PreScreenHost: cctx.String("prescreen-host"),
376
+
PreScreenToken: cctx.String("prescreen-token"),
353
377
},
354
378
)
355
379
}
+16
-17
cmd/hepa/server.go
+16
-17
cmd/hepa/server.go
···
6
6
"log/slog"
7
7
"net/http"
8
8
"os"
9
-
"strings"
10
9
"time"
11
10
12
11
"github.com/bluesky-social/indigo/atproto/identity"
···
14
13
"github.com/bluesky-social/indigo/automod"
15
14
"github.com/bluesky-social/indigo/automod/cachestore"
16
15
"github.com/bluesky-social/indigo/automod/countstore"
16
+
"github.com/bluesky-social/indigo/automod/engine"
17
17
"github.com/bluesky-social/indigo/automod/flagstore"
18
18
"github.com/bluesky-social/indigo/automod/rules"
19
19
"github.com/bluesky-social/indigo/automod/setstore"
···
29
29
Engine *automod.Engine
30
30
RedisClient *redis.Client
31
31
32
-
relayHost string // DEPRECATED
33
-
firehoseParallelism int // DEPRECATED
34
-
logger *slog.Logger
32
+
logger *slog.Logger
35
33
}
36
34
37
35
type Config struct {
38
36
Logger *slog.Logger
39
-
RelayHost string // DEPRECATED
40
37
BskyHost string
41
38
OzoneHost string
42
39
OzoneDID string
···
51
48
AbyssPassword string
52
49
RulesetName string
53
50
RatelimitBypass string
54
-
FirehoseParallelism int // DEPRECATED
55
51
PreScreenHost string
56
52
PreScreenToken string
53
+
ReportDupePeriod time.Duration
54
+
QuotaModReportDay int
55
+
QuotaModTakedownDay int
56
+
QuotaModActionDay int
57
57
}
58
58
59
59
func NewServer(dir identity.Directory, config Config) (*Server, error) {
···
62
62
logger = slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
63
63
Level: slog.LevelInfo,
64
64
}))
65
-
}
66
-
67
-
relayws := config.RelayHost
68
-
if !strings.HasPrefix(relayws, "ws") {
69
-
return nil, fmt.Errorf("specified relay host must include 'ws://' or 'wss://'")
70
65
}
71
66
72
67
var ozoneClient *xrpc.Client
···
206
201
bskyClient.Headers["x-ratelimit-bypass"] = config.RatelimitBypass
207
202
}
208
203
blobClient := util.RobustHTTPClient()
209
-
engine := automod.Engine{
204
+
eng := automod.Engine{
210
205
Logger: logger,
211
206
Directory: dir,
212
207
Counters: counters,
···
219
214
OzoneClient: ozoneClient,
220
215
AdminClient: adminClient,
221
216
BlobClient: blobClient,
217
+
Config: engine.EngineConfig{
218
+
ReportDupePeriod: config.ReportDupePeriod,
219
+
QuotaModReportDay: config.QuotaModReportDay,
220
+
QuotaModTakedownDay: config.QuotaModTakedownDay,
221
+
QuotaModActionDay: config.QuotaModActionDay,
222
+
},
222
223
}
223
224
224
225
s := &Server{
225
-
relayHost: config.RelayHost,
226
-
firehoseParallelism: config.FirehoseParallelism,
227
-
logger: logger,
228
-
Engine: &engine,
229
-
RedisClient: rdb,
226
+
logger: logger,
227
+
Engine: &eng,
228
+
RedisClient: rdb,
230
229
}
231
230
232
231
return s, nil
+1
-4
cmd/laputa/main.go
+1
-4
cmd/laputa/main.go
···
14
14
_ "go.uber.org/automaxprocs"
15
15
16
16
"github.com/carlmjohnson/versioninfo"
17
-
logging "github.com/ipfs/go-log"
18
17
"github.com/urfave/cli/v2"
19
18
"go.opentelemetry.io/otel"
20
19
"go.opentelemetry.io/otel/attribute"
···
24
23
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
25
24
"gorm.io/plugin/opentelemetry/tracing"
26
25
)
27
-
28
-
var log = logging.Logger("laputa")
29
26
30
27
func main() {
31
28
run(os.Args)
···
158
155
}
159
156
}
160
157
161
-
cstore, err := carstore.NewCarStore(csdb, csdir)
158
+
cstore, err := carstore.NewCarStore(csdb, []string{csdir})
162
159
if err != nil {
163
160
return err
164
161
}
+2
-2
cmd/netsync/main.go
+2
-2
cmd/netsync/main.go
···
345
345
Handler: mux,
346
346
}
347
347
348
+
state.wg.Add(1)
348
349
go func() {
349
-
state.wg.Add(1)
350
350
defer state.wg.Done()
351
351
if err := metricsServer.ListenAndServe(); err != http.ErrServerClosed {
352
352
logger.Error("failed to start metrics server", "err", err)
···
368
368
}
369
369
370
370
// Check for empty queue
371
+
state.wg.Add(1)
371
372
go func() {
372
-
state.wg.Add(1)
373
373
defer state.wg.Done()
374
374
t := time.NewTicker(30 * time.Second)
375
375
for {
+2
-2
cmd/palomar/Dockerfile
+2
-2
cmd/palomar/Dockerfile
···
3
3
# podman build -f ./cmd/palomar/Dockerfile -t palomar .
4
4
5
5
### Compile stage
6
-
FROM golang:1.22-alpine3.19 AS build-env
6
+
FROM golang:1.23-alpine3.20 AS build-env
7
7
RUN apk add --no-cache build-base make git
8
8
9
9
ADD . /dockerbuild
···
15
15
go build -tags timetzdata -o /palomar ./cmd/palomar
16
16
17
17
### Run stage
18
-
FROM alpine:3.19
18
+
FROM alpine:3.20
19
19
20
20
RUN apk add --no-cache --update dumb-init ca-certificates
21
21
ENTRYPOINT ["dumb-init", "--"]
+43
cmd/rainbow/Dockerfile
+43
cmd/rainbow/Dockerfile
···
1
+
FROM golang:1.23-bullseye AS build-env
2
+
3
+
ENV DEBIAN_FRONTEND=noninteractive
4
+
ENV TZ=Etc/UTC
5
+
ENV GODEBUG="netdns=go"
6
+
ENV GOOS="linux"
7
+
ENV GOARCH="amd64"
8
+
ENV CGO_ENABLED="1"
9
+
10
+
WORKDIR /usr/src/rainbow
11
+
12
+
COPY . .
13
+
14
+
RUN go mod download && \
15
+
go mod verify
16
+
17
+
RUN go build \
18
+
-v \
19
+
-trimpath \
20
+
-tags timetzdata \
21
+
-o /rainbow-bin \
22
+
./cmd/rainbow
23
+
24
+
FROM debian:bullseye-slim
25
+
26
+
ENV DEBIAN_FRONTEND="noninteractive"
27
+
ENV TZ=Etc/UTC
28
+
ENV GODEBUG="netdns=go"
29
+
30
+
RUN apt-get update && apt-get install --yes \
31
+
dumb-init \
32
+
ca-certificates \
33
+
runit
34
+
35
+
WORKDIR /rainbow
36
+
COPY --from=build-env /rainbow-bin /usr/bin/rainbow
37
+
38
+
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
39
+
CMD ["/usr/bin/rainbow"]
40
+
41
+
LABEL org.opencontainers.image.source=https://github.com/bluesky-social/indigo
42
+
LABEL org.opencontainers.image.description="rainbow atproto firehose fanout service"
43
+
LABEL org.opencontainers.image.licenses=MIT
+32
cmd/rainbow/README.md
+32
cmd/rainbow/README.md
···
1
+
2
+
`rainbow`: atproto Firehose Fanout Service
3
+
==========================================
4
+
5
+
This is an atproto service which consumes from a firehose (eg, from a relay or PDS) and fans out events to many subscribers.
6
+
7
+
Features and design points:
8
+
9
+
- retains "backfill window" on local disk (using [pebble](https://github.com/cockroachdb/pebble))
10
+
- serves the `com.atproto.sync.subscribeRepos` endpoint (WebSocket)
11
+
- retains upstream firehose "sequence numbers"
12
+
- does not validate events (signatures, repo tree, hashes, etc), just passes through
13
+
- does not archive or mirror individual records or entire repositories (or implement related API endpoints)
14
+
- disk I/O intensive: fast NVMe disks are recommended, and RAM is helpful for caching
15
+
- single golang binary for easy deployment
16
+
- observability: logging, prometheus metrics, OTEL traces
17
+
18
+
## Running
19
+
20
+
This is a simple, single-binary Go program. You can also build and run it as a docker container (see `./Dockerfile`).
21
+
22
+
From the top level of this repo, you can build:
23
+
24
+
```shell
25
+
go build ./cmd/rainbow -o rainbow-bin
26
+
```
27
+
28
+
or just run it, and see configuration options:
29
+
30
+
```shell
31
+
go run ./cmd/rainbow --help
32
+
```
+218
cmd/rainbow/main.go
+218
cmd/rainbow/main.go
···
1
+
package main
2
+
3
+
import (
4
+
"context"
5
+
"log/slog"
6
+
_ "net/http/pprof"
7
+
"os"
8
+
"os/signal"
9
+
"syscall"
10
+
"time"
11
+
12
+
"github.com/bluesky-social/indigo/events"
13
+
"github.com/bluesky-social/indigo/splitter"
14
+
15
+
"github.com/carlmjohnson/versioninfo"
16
+
_ "github.com/joho/godotenv/autoload"
17
+
"github.com/urfave/cli/v2"
18
+
"go.opentelemetry.io/otel"
19
+
"go.opentelemetry.io/otel/attribute"
20
+
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
21
+
"go.opentelemetry.io/otel/sdk/resource"
22
+
tracesdk "go.opentelemetry.io/otel/sdk/trace"
23
+
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
24
+
_ "go.uber.org/automaxprocs"
25
+
)
26
+
27
+
var log = slog.Default().With("system", "rainbow")
28
+
29
+
func init() {
30
+
// control log level using, eg, GOLOG_LOG_LEVEL=debug
31
+
//logging.SetAllLoggers(logging.LevelDebug)
32
+
}
33
+
34
+
func main() {
35
+
run(os.Args)
36
+
}
37
+
38
+
func run(args []string) {
39
+
app := cli.App{
40
+
Name: "rainbow",
41
+
Usage: "atproto firehose fan-out daemon",
42
+
Version: versioninfo.Short(),
43
+
}
44
+
45
+
app.Flags = []cli.Flag{
46
+
// TODO: unimplemented, always assumes https:// and wss://
47
+
//&cli.BoolFlag{
48
+
// Name: "crawl-insecure-ws",
49
+
// Usage: "when connecting to PDS instances, use ws:// instead of wss://",
50
+
// EnvVars: []string{"RAINBOW_INSECURE_CRAWL"},
51
+
//},
52
+
&cli.StringFlag{
53
+
Name: "splitter-host",
54
+
Value: "bsky.network",
55
+
EnvVars: []string{"ATP_RELAY_HOST", "RAINBOW_RELAY_HOST"},
56
+
},
57
+
&cli.StringFlag{
58
+
Name: "persist-db",
59
+
Value: "./rainbow.db",
60
+
Usage: "path to persistence db",
61
+
EnvVars: []string{"RAINBOW_DB_PATH"},
62
+
},
63
+
&cli.StringFlag{
64
+
Name: "cursor-file",
65
+
Value: "./rainbow-cursor",
66
+
Usage: "write upstream cursor number to this file",
67
+
EnvVars: []string{"RAINBOW_CURSOR_PATH"},
68
+
},
69
+
&cli.StringFlag{
70
+
Name: "api-listen",
71
+
Value: ":2480",
72
+
EnvVars: []string{"RAINBOW_API_LISTEN"},
73
+
},
74
+
&cli.StringFlag{
75
+
Name: "metrics-listen",
76
+
Value: ":2481",
77
+
EnvVars: []string{"RAINBOW_METRICS_LISTEN", "SPLITTER_METRICS_LISTEN"},
78
+
},
79
+
&cli.Float64Flag{
80
+
Name: "persist-hours",
81
+
Value: 24 * 3,
82
+
EnvVars: []string{"RAINBOW_PERSIST_HOURS", "SPLITTER_PERSIST_HOURS"},
83
+
Usage: "hours to buffer (float, may be fractional)",
84
+
},
85
+
&cli.Int64Flag{
86
+
Name: "persist-bytes",
87
+
Value: 0,
88
+
Usage: "max bytes target for event cache, 0 to disable size target trimming",
89
+
EnvVars: []string{"RAINBOW_PERSIST_BYTES", "SPLITTER_PERSIST_BYTES"},
90
+
},
91
+
&cli.StringSliceFlag{
92
+
Name: "next-crawler",
93
+
Usage: "forward POST requestCrawl to this url, should be machine root url and not xrpc/requestCrawl, comma separated list",
94
+
EnvVars: []string{"RELAY_NEXT_CRAWLER"},
95
+
},
96
+
}
97
+
98
+
// TODO: slog.SetDefault and set module `var log *slog.Logger` based on flags and env
99
+
100
+
app.Action = Splitter
101
+
err := app.Run(os.Args)
102
+
if err != nil {
103
+
log.Error(err.Error())
104
+
os.Exit(1)
105
+
}
106
+
}
107
+
108
+
func Splitter(cctx *cli.Context) error {
109
+
// Trap SIGINT to trigger a shutdown.
110
+
signals := make(chan os.Signal, 1)
111
+
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
112
+
113
+
// Enable OTLP HTTP exporter
114
+
// For relevant environment variables:
115
+
// https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlptrace#readme-environment-variables
116
+
// At a minimum, you need to set
117
+
// OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
118
+
if ep := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"); ep != "" {
119
+
log.Info("setting up trace exporter", "endpoint", ep)
120
+
ctx, cancel := context.WithCancel(context.Background())
121
+
defer cancel()
122
+
123
+
exp, err := otlptracehttp.New(ctx)
124
+
if err != nil {
125
+
log.Error("failed to create trace exporter", "error", err)
126
+
os.Exit(1)
127
+
}
128
+
defer func() {
129
+
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
130
+
defer cancel()
131
+
if err := exp.Shutdown(ctx); err != nil {
132
+
log.Error("failed to shutdown trace exporter", "error", err)
133
+
}
134
+
}()
135
+
136
+
tp := tracesdk.NewTracerProvider(
137
+
tracesdk.WithBatcher(exp),
138
+
tracesdk.WithResource(resource.NewWithAttributes(
139
+
semconv.SchemaURL,
140
+
semconv.ServiceNameKey.String("splitter"),
141
+
attribute.String("env", os.Getenv("ENVIRONMENT")), // DataDog
142
+
attribute.String("environment", os.Getenv("ENVIRONMENT")), // Others
143
+
attribute.Int64("ID", 1),
144
+
)),
145
+
)
146
+
otel.SetTracerProvider(tp)
147
+
}
148
+
149
+
persistPath := cctx.String("persist-db")
150
+
upstreamHost := cctx.String("splitter-host")
151
+
nextCrawlers := cctx.StringSlice("next-crawler")
152
+
153
+
var spl *splitter.Splitter
154
+
var err error
155
+
if persistPath != "" {
156
+
log.Info("building splitter with storage at", "path", persistPath)
157
+
ppopts := events.PebblePersistOptions{
158
+
DbPath: persistPath,
159
+
PersistDuration: time.Duration(float64(time.Hour) * cctx.Float64("persist-hours")),
160
+
GCPeriod: 5 * time.Minute,
161
+
MaxBytes: uint64(cctx.Int64("persist-bytes")),
162
+
}
163
+
conf := splitter.SplitterConfig{
164
+
UpstreamHost: upstreamHost,
165
+
CursorFile: cctx.String("cursor-file"),
166
+
PebbleOptions: &ppopts,
167
+
}
168
+
spl, err = splitter.NewSplitter(conf, nextCrawlers)
169
+
} else {
170
+
log.Info("building in-memory splitter")
171
+
conf := splitter.SplitterConfig{
172
+
UpstreamHost: upstreamHost,
173
+
CursorFile: cctx.String("cursor-file"),
174
+
}
175
+
spl, err = splitter.NewSplitter(conf, nextCrawlers)
176
+
}
177
+
if err != nil {
178
+
log.Error("failed to create splitter", "path", persistPath, "error", err)
179
+
os.Exit(1)
180
+
return err
181
+
}
182
+
183
+
// set up metrics endpoint
184
+
go func() {
185
+
if err := spl.StartMetrics(cctx.String("metrics-listen")); err != nil {
186
+
log.Error("failed to start metrics endpoint", "err", err)
187
+
os.Exit(1)
188
+
}
189
+
}()
190
+
191
+
runErr := make(chan error, 1)
192
+
193
+
go func() {
194
+
err := spl.Start(cctx.String("api-listen"))
195
+
runErr <- err
196
+
}()
197
+
198
+
log.Info("startup complete")
199
+
select {
200
+
case <-signals:
201
+
log.Info("received shutdown signal")
202
+
if err := spl.Shutdown(); err != nil {
203
+
log.Error("error during Splitter shutdown", "err", err)
204
+
}
205
+
case err := <-runErr:
206
+
if err != nil {
207
+
log.Error("error during Splitter startup", "err", err)
208
+
}
209
+
log.Info("shutting down")
210
+
if err := spl.Shutdown(); err != nil {
211
+
log.Error("error during Splitter shutdown", "err", err)
212
+
}
213
+
}
214
+
215
+
log.Info("shutdown complete")
216
+
217
+
return nil
218
+
}
+1
-1
cmd/sonar/Dockerfile
+1
-1
cmd/sonar/Dockerfile
+5
-5
cmd/sonar/main.go
+5
-5
cmd/sonar/main.go
···
104
104
pool := sequential.NewScheduler(u.Host, s.HandleStreamEvent)
105
105
106
106
// Start a goroutine to manage the cursor file, saving the current cursor every 5 seconds.
107
+
wg.Add(1)
107
108
go func() {
108
-
wg.Add(1)
109
109
defer wg.Done()
110
110
ticker := time.NewTicker(5 * time.Second)
111
111
logger := logger.With("source", "cursor_file_manager")
···
130
130
}()
131
131
132
132
// Start a goroutine to manage the liveness checker, shutting down if no events are received for 15 seconds
133
+
wg.Add(1)
133
134
go func() {
134
-
wg.Add(1)
135
135
defer wg.Done()
136
136
ticker := time.NewTicker(15 * time.Second)
137
137
lastSeq := int64(0)
···
167
167
}
168
168
169
169
// Startup metrics server
170
+
wg.Add(1)
170
171
go func() {
171
-
wg.Add(1)
172
172
defer wg.Done()
173
173
logger = logger.With("source", "metrics_server")
174
174
···
194
194
}
195
195
defer c.Close()
196
196
197
+
wg.Add(1)
197
198
go func() {
198
-
wg.Add(1)
199
199
defer wg.Done()
200
-
err = events.HandleRepoStream(ctx, c, pool)
200
+
err = events.HandleRepoStream(ctx, c, pool, logger)
201
201
logger.Info("HandleRepoStream returned unexpectedly", "err", err)
202
202
cancel()
203
203
}()
-3
cmd/stress/main.go
-3
cmd/stress/main.go
···
26
26
_ "github.com/joho/godotenv/autoload"
27
27
28
28
"github.com/carlmjohnson/versioninfo"
29
-
logging "github.com/ipfs/go-log"
30
29
"github.com/ipld/go-car"
31
30
cli "github.com/urfave/cli/v2"
32
31
)
33
-
34
-
var log = logging.Logger("stress")
35
32
36
33
func main() {
37
34
run(os.Args)
+1
-1
cmd/supercollider/Dockerfile
+1
-1
cmd/supercollider/Dockerfile
+1
-1
cmd/supercollider/main.go
+1
-1
cmd/supercollider/main.go
+6
did/metrics.go
+6
did/metrics.go
···
9
9
Name: "multiresolver_resolved_dids_total",
10
10
Help: "Total number of DIDs resolved",
11
11
}, []string{"resolver"})
12
+
13
+
var mrResolveDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
14
+
Name: "indigo_multiresolver_resolve_duration_seconds",
15
+
Help: "A histogram of resolve latencies",
16
+
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
17
+
}, []string{"resolver"})
+6
did/multi.go
+6
did/multi.go
···
3
3
import (
4
4
"context"
5
5
"fmt"
6
+
"time"
6
7
7
8
"github.com/whyrusleeping/go-did"
8
9
)
···
43
44
}
44
45
45
46
func (mr *MultiResolver) GetDocument(ctx context.Context, didstr string) (*did.Document, error) {
47
+
s := time.Now()
48
+
46
49
pdid, err := did.ParseDID(didstr)
47
50
if err != nil {
48
51
return nil, err
49
52
}
50
53
51
54
method := pdid.Protocol()
55
+
defer func() {
56
+
mrResolveDuration.WithLabelValues(method).Observe(time.Since(s).Seconds())
57
+
}()
52
58
53
59
res, ok := mr.handlers[method]
54
60
if !ok {
+18
-10
events/consumer.go
+18
-10
events/consumer.go
···
4
4
"context"
5
5
"fmt"
6
6
"io"
7
+
"log/slog"
7
8
"net"
8
9
"time"
9
10
···
108
109
return n, err
109
110
}
110
111
111
-
func HandleRepoStream(ctx context.Context, con *websocket.Conn, sched Scheduler) error {
112
+
// HandleRepoStream
113
+
// con is source of events
114
+
// sched gets AddWork for each event
115
+
// log may be nil for default logger
116
+
func HandleRepoStream(ctx context.Context, con *websocket.Conn, sched Scheduler, log *slog.Logger) error {
117
+
if log == nil {
118
+
log = slog.Default().With("system", "events")
119
+
}
112
120
ctx, cancel := context.WithCancel(ctx)
113
121
defer cancel()
114
122
defer sched.Shutdown()
···
124
132
select {
125
133
case <-t.C:
126
134
if err := con.WriteControl(websocket.PingMessage, []byte{}, time.Now().Add(time.Second*10)); err != nil {
127
-
log.Warnf("failed to ping: %s", err)
135
+
log.Warn("failed to ping", "err", err)
128
136
}
129
137
case <-ctx.Done():
130
138
con.Close()
···
145
153
146
154
con.SetPongHandler(func(_ string) error {
147
155
if err := con.SetReadDeadline(time.Now().Add(time.Minute)); err != nil {
148
-
log.Errorf("failed to set read deadline: %s", err)
156
+
log.Error("failed to set read deadline", "err", err)
149
157
}
150
158
151
159
return nil
···
194
202
}
195
203
196
204
if evt.Seq < lastSeq {
197
-
log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq)
205
+
log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq)
198
206
}
199
207
200
208
lastSeq = evt.Seq
···
211
219
}
212
220
213
221
if evt.Seq < lastSeq {
214
-
log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq)
222
+
log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq)
215
223
}
216
224
lastSeq = evt.Seq
217
225
···
227
235
}
228
236
229
237
if evt.Seq < lastSeq {
230
-
log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq)
238
+
log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq)
231
239
}
232
240
lastSeq = evt.Seq
233
241
···
243
251
}
244
252
245
253
if evt.Seq < lastSeq {
246
-
log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq)
254
+
log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq)
247
255
}
248
256
lastSeq = evt.Seq
249
257
···
271
279
}
272
280
273
281
if evt.Seq < lastSeq {
274
-
log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq)
282
+
log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq)
275
283
}
276
284
lastSeq = evt.Seq
277
285
···
287
295
}
288
296
289
297
if evt.Seq < lastSeq {
290
-
log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq)
298
+
log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq)
291
299
}
292
300
lastSeq = evt.Seq
293
301
···
303
311
}
304
312
305
313
if evt.Seq < lastSeq {
306
-
log.Errorf("Got events out of order from stream (seq = %d, prev = %d)", evt.Seq, lastSeq)
314
+
log.Error("Got events out of order from stream", "seq", evt.Seq, "prev", lastSeq)
307
315
}
308
316
309
317
lastSeq = evt.Seq
+2
-2
events/dbpersist.go
+2
-2
events/dbpersist.go
···
131
131
132
132
if needsFlush {
133
133
if err := p.Flush(context.Background()); err != nil {
134
-
log.Errorf("failed to flush batch: %s", err)
134
+
log.Error("failed to flush batch", "err", err)
135
135
}
136
136
}
137
137
}
···
323
323
func (p *DbPersistence) RecordFromRepoCommit(ctx context.Context, evt *comatproto.SyncSubscribeRepos_Commit) (*RepoEventRecord, error) {
324
324
// TODO: hack hack hack
325
325
if len(evt.Ops) > 8192 {
326
-
log.Errorf("(VERY BAD) truncating ops field in outgoing event (len = %d)", len(evt.Ops))
326
+
log.Error("(VERY BAD) truncating ops field in outgoing event", "len", len(evt.Ops))
327
327
evt.Ops = evt.Ops[:8192]
328
328
}
329
329
+13
-19
events/dbpersist_test.go
+13
-19
events/dbpersist_test.go
···
1
-
package events_test
1
+
package events
2
2
3
3
import (
4
4
"context"
···
11
11
atproto "github.com/bluesky-social/indigo/api/atproto"
12
12
"github.com/bluesky-social/indigo/api/bsky"
13
13
"github.com/bluesky-social/indigo/carstore"
14
-
"github.com/bluesky-social/indigo/events"
15
14
lexutil "github.com/bluesky-social/indigo/lex/util"
16
15
"github.com/bluesky-social/indigo/models"
17
-
"github.com/bluesky-social/indigo/pds"
16
+
pds "github.com/bluesky-social/indigo/pds/data"
18
17
"github.com/bluesky-social/indigo/repomgr"
19
18
"github.com/bluesky-social/indigo/util"
20
-
"github.com/ipfs/go-log/v2"
21
19
"gorm.io/driver/sqlite"
22
20
"gorm.io/gorm"
23
21
)
24
22
25
-
func init() {
26
-
log.SetAllLoggers(log.LevelDebug)
27
-
}
28
-
29
23
func BenchmarkDBPersist(b *testing.B) {
30
24
ctx := context.Background()
31
25
···
61
55
defer os.RemoveAll(tempPath)
62
56
63
57
// Initialize a DBPersister
64
-
dbp, err := events.NewDbPersistence(db, cs, nil)
58
+
dbp, err := NewDbPersistence(db, cs, nil)
65
59
if err != nil {
66
60
b.Fatal(err)
67
61
}
68
62
69
63
// Create a bunch of events
70
-
evtman := events.NewEventManager(dbp)
64
+
evtman := NewEventManager(dbp)
71
65
72
66
userRepoHead, err := mgr.GetRepoRoot(ctx, 1)
73
67
if err != nil {
74
68
b.Fatal(err)
75
69
}
76
70
77
-
inEvts := make([]*events.XRPCStreamEvent, b.N)
71
+
inEvts := make([]*XRPCStreamEvent, b.N)
78
72
for i := 0; i < b.N; i++ {
79
73
cidLink := lexutil.LexLink(cid)
80
74
headLink := lexutil.LexLink(userRepoHead)
81
-
inEvts[i] = &events.XRPCStreamEvent{
75
+
inEvts[i] = &XRPCStreamEvent{
82
76
RepoCommit: &atproto.SyncSubscribeRepos_Commit{
83
77
Repo: "did:example:123",
84
78
Commit: headLink,
···
136
130
137
131
b.StopTimer()
138
132
139
-
dbp.Playback(ctx, 0, func(evt *events.XRPCStreamEvent) error {
133
+
dbp.Playback(ctx, 0, func(evt *XRPCStreamEvent) error {
140
134
outEvtCount++
141
135
return nil
142
136
})
···
183
177
defer os.RemoveAll(tempPath)
184
178
185
179
// Initialize a DBPersister
186
-
dbp, err := events.NewDbPersistence(db, cs, nil)
180
+
dbp, err := NewDbPersistence(db, cs, nil)
187
181
if err != nil {
188
182
b.Fatal(err)
189
183
}
190
184
191
185
// Create a bunch of events
192
-
evtman := events.NewEventManager(dbp)
186
+
evtman := NewEventManager(dbp)
193
187
194
188
userRepoHead, err := mgr.GetRepoRoot(ctx, 1)
195
189
if err != nil {
196
190
b.Fatal(err)
197
191
}
198
192
199
-
inEvts := make([]*events.XRPCStreamEvent, n)
193
+
inEvts := make([]*XRPCStreamEvent, n)
200
194
for i := 0; i < n; i++ {
201
195
cidLink := lexutil.LexLink(cid)
202
196
headLink := lexutil.LexLink(userRepoHead)
203
-
inEvts[i] = &events.XRPCStreamEvent{
197
+
inEvts[i] = &XRPCStreamEvent{
204
198
RepoCommit: &atproto.SyncSubscribeRepos_Commit{
205
199
Repo: "did:example:123",
206
200
Commit: headLink,
···
256
250
257
251
b.ResetTimer()
258
252
259
-
dbp.Playback(ctx, 0, func(evt *events.XRPCStreamEvent) error {
253
+
dbp.Playback(ctx, 0, func(evt *XRPCStreamEvent) error {
260
254
outEvtCount++
261
255
return nil
262
256
})
···
301
295
return nil, nil, nil, "", err
302
296
}
303
297
304
-
cs, err := carstore.NewCarStore(cardb, cspath)
298
+
cs, err := carstore.NewCarStore(cardb, []string{cspath})
305
299
if err != nil {
306
300
return nil, nil, nil, "", err
307
301
}
+5
-5
events/diskpersist.go
+5
-5
events/diskpersist.go
···
312
312
dp.lk.Lock()
313
313
if err := dp.flushLog(ctx); err != nil {
314
314
// TODO: this happening is quite bad. Need a recovery strategy
315
-
log.Errorf("failed to flush disk log: %s", err)
315
+
log.Error("failed to flush disk log", "err", err)
316
316
}
317
317
dp.lk.Unlock()
318
318
}
···
354
354
case <-t.C:
355
355
if errs := dp.garbageCollect(ctx); len(errs) > 0 {
356
356
for _, err := range errs {
357
-
log.Errorf("garbage collection error: %s", err)
357
+
log.Error("garbage collection error", "err", err)
358
358
}
359
359
}
360
360
}
···
430
430
refsGarbageCollected.WithLabelValues().Add(float64(refsDeleted))
431
431
filesGarbageCollected.WithLabelValues().Add(float64(filesDeleted))
432
432
433
-
log.Infow("garbage collection complete",
433
+
log.Info("garbage collection complete",
434
434
"filesDeleted", filesDeleted,
435
435
"refsDeleted", refsDeleted,
436
436
"oldRefsFound", oldRefsFound,
···
696
696
return nil, err
697
697
}
698
698
if since > lastSeq {
699
-
log.Errorw("playback cursor is greater than last seq of file checked",
699
+
log.Error("playback cursor is greater than last seq of file checked",
700
700
"since", since,
701
701
"lastSeq", lastSeq,
702
702
"filename", fn,
···
778
778
return nil, err
779
779
}
780
780
default:
781
-
log.Warnw("unrecognized event kind coming from log file", "seq", h.Seq, "kind", h.Kind)
781
+
log.Warn("unrecognized event kind coming from log file", "seq", h.Seq, "kind", h.Kind)
782
782
return nil, fmt.Errorf("halting on unrecognized event kind")
783
783
}
784
784
}
+41
-36
events/diskpersist_test.go
+41
-36
events/diskpersist_test.go
···
1
-
package events_test
1
+
package events
2
2
3
3
import (
4
4
"context"
···
14
14
atproto "github.com/bluesky-social/indigo/api/atproto"
15
15
"github.com/bluesky-social/indigo/api/bsky"
16
16
"github.com/bluesky-social/indigo/carstore"
17
-
"github.com/bluesky-social/indigo/events"
18
17
lexutil "github.com/bluesky-social/indigo/lex/util"
19
18
"github.com/bluesky-social/indigo/models"
20
-
"github.com/bluesky-social/indigo/pds"
19
+
pds "github.com/bluesky-social/indigo/pds/data"
21
20
"github.com/bluesky-social/indigo/repomgr"
22
21
"github.com/bluesky-social/indigo/util"
23
22
"gorm.io/gorm"
24
23
)
25
24
26
-
func TestDiskPersist(t *testing.T) {
25
+
func testPersister(t *testing.T, perisistenceFactory func(path string, db *gorm.DB) (EventPersistence, error)) {
27
26
ctx := context.Background()
28
27
29
28
db, _, cs, tempPath, err := setupDBs(t)
···
57
56
58
57
defer os.RemoveAll(tempPath)
59
58
60
-
// Initialize a DBPersister
61
-
62
-
dp, err := events.NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &events.DiskPersistOptions{
63
-
EventsPerFile: 10,
64
-
UIDCacheSize: 100000,
65
-
DIDCacheSize: 100000,
66
-
})
59
+
// Initialize a persister
60
+
dp, err := perisistenceFactory(tempPath, db)
67
61
if err != nil {
68
62
t.Fatal(err)
69
63
}
70
64
71
65
// Create a bunch of events
72
-
evtman := events.NewEventManager(dp)
66
+
evtman := NewEventManager(dp)
73
67
74
68
userRepoHead, err := mgr.GetRepoRoot(ctx, 1)
75
69
if err != nil {
···
77
71
}
78
72
79
73
n := 100
80
-
inEvts := make([]*events.XRPCStreamEvent, n)
74
+
inEvts := make([]*XRPCStreamEvent, n)
81
75
for i := 0; i < n; i++ {
82
76
cidLink := lexutil.LexLink(cid)
83
77
headLink := lexutil.LexLink(userRepoHead)
84
-
inEvts[i] = &events.XRPCStreamEvent{
78
+
inEvts[i] = &XRPCStreamEvent{
85
79
RepoCommit: &atproto.SyncSubscribeRepos_Commit{
86
80
Repo: "did:example:123",
87
81
Commit: headLink,
···
93
87
},
94
88
},
95
89
Time: time.Now().Format(util.ISO8601),
90
+
Seq: int64(i),
96
91
},
97
92
}
98
93
}
···
112
107
outEvtCount := 0
113
108
expectedEvtCount := n
114
109
115
-
dp.Playback(ctx, 0, func(evt *events.XRPCStreamEvent) error {
110
+
dp.Playback(ctx, 0, func(evt *XRPCStreamEvent) error {
116
111
outEvtCount++
117
112
return nil
118
113
})
···
125
120
126
121
time.Sleep(time.Millisecond * 100)
127
122
128
-
dp2, err := events.NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &events.DiskPersistOptions{
123
+
dp2, err := NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{
129
124
EventsPerFile: 10,
130
125
UIDCacheSize: 100000,
131
126
DIDCacheSize: 100000,
···
134
129
t.Fatal(err)
135
130
}
136
131
137
-
evtman2 := events.NewEventManager(dp2)
132
+
evtman2 := NewEventManager(dp2)
138
133
139
-
inEvts = make([]*events.XRPCStreamEvent, n)
134
+
inEvts = make([]*XRPCStreamEvent, n)
140
135
for i := 0; i < n; i++ {
141
136
cidLink := lexutil.LexLink(cid)
142
137
headLink := lexutil.LexLink(userRepoHead)
143
-
inEvts[i] = &events.XRPCStreamEvent{
138
+
inEvts[i] = &XRPCStreamEvent{
144
139
RepoCommit: &atproto.SyncSubscribeRepos_Commit{
145
140
Repo: "did:example:123",
146
141
Commit: headLink,
···
163
158
}
164
159
}
165
160
}
161
+
func TestDiskPersist(t *testing.T) {
162
+
factory := func(tempPath string, db *gorm.DB) (EventPersistence, error) {
163
+
return NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{
164
+
EventsPerFile: 10,
165
+
UIDCacheSize: 100000,
166
+
DIDCacheSize: 100000,
167
+
})
168
+
}
169
+
testPersister(t, factory)
170
+
}
166
171
167
172
func BenchmarkDiskPersist(b *testing.B) {
168
173
db, _, cs, tempPath, err := setupDBs(b)
···
174
179
175
180
// Initialize a DBPersister
176
181
177
-
dp, err := events.NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &events.DiskPersistOptions{
182
+
dp, err := NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{
178
183
EventsPerFile: 5000,
179
184
UIDCacheSize: 100000,
180
185
DIDCacheSize: 100000,
···
187
192
188
193
}
189
194
190
-
func runPersisterBenchmark(b *testing.B, cs carstore.CarStore, db *gorm.DB, p events.EventPersistence) {
195
+
func runPersisterBenchmark(b *testing.B, cs carstore.CarStore, db *gorm.DB, p EventPersistence) {
191
196
ctx := context.Background()
192
197
193
198
db.AutoMigrate(&pds.User{})
···
215
220
}
216
221
217
222
// Create a bunch of events
218
-
evtman := events.NewEventManager(p)
223
+
evtman := NewEventManager(p)
219
224
220
225
userRepoHead, err := mgr.GetRepoRoot(ctx, 1)
221
226
if err != nil {
222
227
b.Fatal(err)
223
228
}
224
229
225
-
inEvts := make([]*events.XRPCStreamEvent, b.N)
230
+
inEvts := make([]*XRPCStreamEvent, b.N)
226
231
for i := 0; i < b.N; i++ {
227
232
cidLink := lexutil.LexLink(cid)
228
233
headLink := lexutil.LexLink(userRepoHead)
229
-
inEvts[i] = &events.XRPCStreamEvent{
234
+
inEvts[i] = &XRPCStreamEvent{
230
235
RepoCommit: &atproto.SyncSubscribeRepos_Commit{
231
236
Repo: "did:example:123",
232
237
Commit: headLink,
···
290
295
291
296
// Initialize a DBPersister
292
297
293
-
dp, err := events.NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &events.DiskPersistOptions{
298
+
dp, err := NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{
294
299
EventsPerFile: 20,
295
300
UIDCacheSize: 100000,
296
301
DIDCacheSize: 100000,
···
302
307
runEventManagerTest(t, cs, db, dp)
303
308
}
304
309
305
-
func runEventManagerTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p events.EventPersistence) {
310
+
func runEventManagerTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p EventPersistence) {
306
311
ctx := context.Background()
307
312
308
313
db.AutoMigrate(&pds.User{})
···
329
334
t.Fatal(err)
330
335
}
331
336
332
-
evtman := events.NewEventManager(p)
337
+
evtman := NewEventManager(p)
333
338
334
339
userRepoHead, err := mgr.GetRepoRoot(ctx, 1)
335
340
if err != nil {
···
337
342
}
338
343
339
344
testSize := 100 // you can adjust this number as needed
340
-
inEvts := make([]*events.XRPCStreamEvent, testSize)
345
+
inEvts := make([]*XRPCStreamEvent, testSize)
341
346
for i := 0; i < testSize; i++ {
342
347
cidLink := lexutil.LexLink(cid)
343
348
headLink := lexutil.LexLink(userRepoHead)
344
-
inEvts[i] = &events.XRPCStreamEvent{
349
+
inEvts[i] = &XRPCStreamEvent{
345
350
RepoCommit: &atproto.SyncSubscribeRepos_Commit{
346
351
Repo: "did:example:123",
347
352
Commit: headLink,
···
368
373
}
369
374
370
375
outEvtCount := 0
371
-
p.Playback(ctx, 0, func(evt *events.XRPCStreamEvent) error {
376
+
p.Playback(ctx, 0, func(evt *XRPCStreamEvent) error {
372
377
// Check that the contents of the output events match the input events
373
378
// Clear cache, don't care if one has it and not the other
374
379
inEvts[outEvtCount].Preserialized = nil
···
397
402
398
403
// Initialize a DBPersister
399
404
400
-
dp, err := events.NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &events.DiskPersistOptions{
405
+
dp, err := NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{
401
406
EventsPerFile: 10,
402
407
UIDCacheSize: 100000,
403
408
DIDCacheSize: 100000,
···
409
414
runTakedownTest(t, cs, db, dp)
410
415
}
411
416
412
-
func runTakedownTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p events.EventPersistence) {
417
+
func runTakedownTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p EventPersistence) {
413
418
ctx := context.TODO()
414
419
415
420
db.AutoMigrate(&pds.User{})
···
439
444
}
440
445
}
441
446
442
-
evtman := events.NewEventManager(p)
447
+
evtman := NewEventManager(p)
443
448
444
449
testSize := 100 // you can adjust this number as needed
445
-
inEvts := make([]*events.XRPCStreamEvent, testSize*userCount)
450
+
inEvts := make([]*XRPCStreamEvent, testSize*userCount)
446
451
for i := 0; i < testSize*userCount; i++ {
447
452
user := users[i%userCount]
448
453
_, cid, err := mgr.CreateRecord(ctx, user.Uid, "app.bsky.feed.post", &bsky.FeedPost{
···
460
465
461
466
cidLink := lexutil.LexLink(cid)
462
467
headLink := lexutil.LexLink(userRepoHead)
463
-
inEvts[i] = &events.XRPCStreamEvent{
468
+
inEvts[i] = &XRPCStreamEvent{
464
469
RepoCommit: &atproto.SyncSubscribeRepos_Commit{
465
470
Repo: user.Did,
466
471
Commit: headLink,
···
495
500
496
501
// Verify that the events of the user have been removed from the event stream
497
502
var evtsCount int
498
-
if err := p.Playback(ctx, 0, func(evt *events.XRPCStreamEvent) error {
503
+
if err := p.Playback(ctx, 0, func(evt *XRPCStreamEvent) error {
499
504
evtsCount++
500
505
if evt.RepoCommit.Repo == takeDownUser.Did {
501
506
t.Fatalf("found event for user %d after takedown", takeDownUser.Uid)
+94
-13
events/events.go
+94
-13
events/events.go
···
6
6
"errors"
7
7
"fmt"
8
8
"io"
9
+
"log/slog"
9
10
"sync"
10
11
"time"
11
12
···
14
15
"github.com/bluesky-social/indigo/models"
15
16
"github.com/prometheus/client_golang/prometheus"
16
17
17
-
logging "github.com/ipfs/go-log"
18
18
cbg "github.com/whyrusleeping/cbor-gen"
19
19
"go.opentelemetry.io/otel"
20
20
)
21
21
22
-
var log = logging.Logger("events")
22
+
var log = slog.Default().With("system", "events")
23
23
24
24
type Scheduler interface {
25
25
AddWork(ctx context.Context, repo string, val *XRPCStreamEvent) error
···
34
34
crossoverBufferSize int
35
35
36
36
persister EventPersistence
37
+
38
+
log *slog.Logger
37
39
}
38
40
39
41
func NewEventManager(persister EventPersistence) *EventManager {
···
41
43
bufferSize: 16 << 10,
42
44
crossoverBufferSize: 512,
43
45
persister: persister,
46
+
log: slog.Default().With("system", "events"),
44
47
}
45
48
46
49
persister.SetEventBroadcaster(em.broadcastEvent)
···
67
70
func (em *EventManager) broadcastEvent(evt *XRPCStreamEvent) {
68
71
// the main thing we do is send it out, so MarshalCBOR once
69
72
if err := evt.Preserialize(); err != nil {
70
-
log.Errorf("broadcast serialize failed, %s", err)
73
+
em.log.Error("broadcast serialize failed", "err", err)
71
74
// serialize isn't going to go better later, this event is cursed
72
75
return
73
76
}
···
93
96
// code
94
97
s.filter = func(*XRPCStreamEvent) bool { return false }
95
98
96
-
log.Warnw("dropping slow consumer due to event overflow", "bufferSize", len(s.outgoing), "ident", s.ident)
99
+
em.log.Warn("dropping slow consumer due to event overflow", "bufferSize", len(s.outgoing), "ident", s.ident)
97
100
go func(torem *Subscriber) {
98
101
torem.lk.Lock()
99
102
if !torem.cleanedUp {
···
104
107
},
105
108
}:
106
109
case <-time.After(time.Second * 5):
107
-
log.Warnw("failed to send error frame to backed up consumer", "ident", torem.ident)
110
+
em.log.Warn("failed to send error frame to backed up consumer", "ident", torem.ident)
108
111
}
109
112
}
110
113
torem.lk.Unlock()
···
121
124
// accept a uid. The lookup inside the persister is notably expensive (despite
122
125
// being an lru cache?)
123
126
if err := em.persister.Persist(ctx, evt); err != nil {
124
-
log.Errorf("failed to persist outbound event: %s", err)
127
+
em.log.Error("failed to persist outbound event", "err", err)
125
128
}
126
129
}
127
130
···
219
222
return obj.MarshalCBOR(cborWriter)
220
223
}
221
224
225
+
func (xevt *XRPCStreamEvent) Deserialize(r io.Reader) error {
226
+
var header EventHeader
227
+
if err := header.UnmarshalCBOR(r); err != nil {
228
+
return fmt.Errorf("reading header: %w", err)
229
+
}
230
+
switch header.Op {
231
+
case EvtKindMessage:
232
+
switch header.MsgType {
233
+
case "#commit":
234
+
var evt comatproto.SyncSubscribeRepos_Commit
235
+
if err := evt.UnmarshalCBOR(r); err != nil {
236
+
return fmt.Errorf("reading repoCommit event: %w", err)
237
+
}
238
+
xevt.RepoCommit = &evt
239
+
case "#handle":
240
+
var evt comatproto.SyncSubscribeRepos_Handle
241
+
if err := evt.UnmarshalCBOR(r); err != nil {
242
+
return err
243
+
}
244
+
xevt.RepoHandle = &evt
245
+
case "#identity":
246
+
var evt comatproto.SyncSubscribeRepos_Identity
247
+
if err := evt.UnmarshalCBOR(r); err != nil {
248
+
return err
249
+
}
250
+
xevt.RepoIdentity = &evt
251
+
case "#account":
252
+
var evt comatproto.SyncSubscribeRepos_Account
253
+
if err := evt.UnmarshalCBOR(r); err != nil {
254
+
return err
255
+
}
256
+
xevt.RepoAccount = &evt
257
+
case "#info":
258
+
// TODO: this might also be a LabelInfo (as opposed to RepoInfo)
259
+
var evt comatproto.SyncSubscribeRepos_Info
260
+
if err := evt.UnmarshalCBOR(r); err != nil {
261
+
return err
262
+
}
263
+
xevt.RepoInfo = &evt
264
+
case "#migrate":
265
+
var evt comatproto.SyncSubscribeRepos_Migrate
266
+
if err := evt.UnmarshalCBOR(r); err != nil {
267
+
return err
268
+
}
269
+
xevt.RepoMigrate = &evt
270
+
case "#tombstone":
271
+
var evt comatproto.SyncSubscribeRepos_Tombstone
272
+
if err := evt.UnmarshalCBOR(r); err != nil {
273
+
return err
274
+
}
275
+
xevt.RepoTombstone = &evt
276
+
case "#labels":
277
+
var evt comatproto.LabelSubscribeLabels_Labels
278
+
if err := evt.UnmarshalCBOR(r); err != nil {
279
+
return fmt.Errorf("reading Labels event: %w", err)
280
+
}
281
+
xevt.LabelLabels = &evt
282
+
}
283
+
case EvtKindErrorFrame:
284
+
var errframe ErrorFrame
285
+
if err := errframe.UnmarshalCBOR(r); err != nil {
286
+
return err
287
+
}
288
+
xevt.Error = &errframe
289
+
default:
290
+
return fmt.Errorf("unrecognized event stream type: %d", header.Op)
291
+
}
292
+
return nil
293
+
}
294
+
295
+
var ErrNoSeq = errors.New("event has no sequence number")
296
+
222
297
// serialize content into Preserialized cache
223
298
func (evt *XRPCStreamEvent) Preserialize() error {
224
299
if evt.Preserialized != nil {
···
290
365
case <-done:
291
366
return ErrPlaybackShutdown
292
367
case out <- e:
293
-
seq := sequenceForEvent(e)
368
+
seq := SequenceForEvent(e)
294
369
if seq > 0 {
295
370
lastSeq = seq
296
371
}
···
298
373
}
299
374
}); err != nil {
300
375
if errors.Is(err, ErrPlaybackShutdown) {
301
-
log.Warnf("events playback: %s", err)
376
+
em.log.Warn("events playback", "err", err)
302
377
} else {
303
-
log.Errorf("events playback: %s", err)
378
+
em.log.Error("events playback", "err", err)
304
379
}
305
380
306
381
// TODO: send an error frame or something?
···
315
390
316
391
// run playback again to get us to the events that have started buffering
317
392
if err := em.persister.Playback(ctx, lastSeq, func(e *XRPCStreamEvent) error {
318
-
seq := sequenceForEvent(e)
319
-
if seq > sequenceForEvent(first) {
393
+
seq := SequenceForEvent(e)
394
+
if seq > SequenceForEvent(first) {
320
395
return ErrCaughtUp
321
396
}
322
397
···
328
403
}
329
404
}); err != nil {
330
405
if !errors.Is(err, ErrCaughtUp) {
331
-
log.Errorf("events playback: %s", err)
406
+
em.log.Error("events playback", "err", err)
332
407
333
408
// TODO: send an error frame or something?
334
409
close(out)
···
351
426
return out, sub.cleanup, nil
352
427
}
353
428
354
-
func sequenceForEvent(evt *XRPCStreamEvent) int64 {
429
+
func SequenceForEvent(evt *XRPCStreamEvent) int64 {
430
+
return evt.Sequence()
431
+
}
432
+
433
+
func (evt *XRPCStreamEvent) Sequence() int64 {
355
434
switch {
356
435
case evt == nil:
357
436
return -1
···
365
444
return evt.RepoTombstone.Seq
366
445
case evt.RepoIdentity != nil:
367
446
return evt.RepoIdentity.Seq
447
+
case evt.RepoAccount != nil:
448
+
return evt.RepoAccount.Seq
368
449
case evt.RepoInfo != nil:
369
450
return -1
370
451
case evt.Error != nil:
+262
events/pebblepersist.go
+262
events/pebblepersist.go
···
1
+
package events
2
+
3
+
import (
4
+
"bytes"
5
+
"context"
6
+
"encoding/binary"
7
+
"encoding/hex"
8
+
"errors"
9
+
"fmt"
10
+
"time"
11
+
12
+
"github.com/bluesky-social/indigo/models"
13
+
"github.com/cockroachdb/pebble"
14
+
)
15
+
16
+
type PebblePersist struct {
17
+
broadcast func(*XRPCStreamEvent)
18
+
db *pebble.DB
19
+
20
+
prevSeq int64
21
+
prevSeqExtra uint32
22
+
23
+
cancel func()
24
+
25
+
options PebblePersistOptions
26
+
}
27
+
28
+
type PebblePersistOptions struct {
29
+
// path where pebble will create a directory full of files
30
+
DbPath string
31
+
32
+
// Throw away posts older than some time ago
33
+
PersistDuration time.Duration
34
+
35
+
// Throw away old posts every so often
36
+
GCPeriod time.Duration
37
+
38
+
// MaxBytes is what we _try_ to keep disk usage under
39
+
MaxBytes uint64
40
+
}
41
+
42
+
var DefaultPebblePersistOptions = PebblePersistOptions{
43
+
PersistDuration: time.Minute * 20,
44
+
GCPeriod: time.Minute * 5,
45
+
MaxBytes: 1024 * 1024 * 1024, // 1 GiB
46
+
}
47
+
48
+
// Create a new EventPersistence which stores data in pebbledb
49
+
// nil opts is ok
50
+
func NewPebblePersistance(opts *PebblePersistOptions) (*PebblePersist, error) {
51
+
if opts == nil {
52
+
opts = &DefaultPebblePersistOptions
53
+
}
54
+
db, err := pebble.Open(opts.DbPath, &pebble.Options{})
55
+
if err != nil {
56
+
return nil, fmt.Errorf("%s: %w", opts.DbPath, err)
57
+
}
58
+
pp := new(PebblePersist)
59
+
pp.options = *opts
60
+
pp.db = db
61
+
return pp, nil
62
+
}
63
+
64
+
func setKeySeqMillis(key []byte, seq, millis int64) {
65
+
binary.BigEndian.PutUint64(key[:8], uint64(seq))
66
+
binary.BigEndian.PutUint64(key[8:16], uint64(millis))
67
+
}
68
+
69
+
func (pp *PebblePersist) Persist(ctx context.Context, e *XRPCStreamEvent) error {
70
+
err := e.Preserialize()
71
+
if err != nil {
72
+
return err
73
+
}
74
+
blob := e.Preserialized
75
+
76
+
seq := e.Sequence()
77
+
nowMillis := time.Now().UnixMilli()
78
+
79
+
if seq < 0 {
80
+
// persist with longer key {prev 8 byte key}{time}{int32 extra counter}
81
+
pp.prevSeqExtra++
82
+
var key [20]byte
83
+
setKeySeqMillis(key[:], seq, nowMillis)
84
+
binary.BigEndian.PutUint32(key[16:], pp.prevSeqExtra)
85
+
86
+
err = pp.db.Set(key[:], blob, pebble.Sync)
87
+
} else {
88
+
pp.prevSeq = seq
89
+
pp.prevSeqExtra = 0
90
+
var key [16]byte
91
+
setKeySeqMillis(key[:], seq, nowMillis)
92
+
93
+
err = pp.db.Set(key[:], blob, pebble.Sync)
94
+
}
95
+
96
+
if err != nil {
97
+
return err
98
+
}
99
+
pp.broadcast(e)
100
+
101
+
return err
102
+
}
103
+
104
+
func eventFromPebbleIter(iter *pebble.Iterator) (*XRPCStreamEvent, error) {
105
+
blob, err := iter.ValueAndErr()
106
+
if err != nil {
107
+
return nil, err
108
+
}
109
+
br := bytes.NewReader(blob)
110
+
evt := new(XRPCStreamEvent)
111
+
err = evt.Deserialize(br)
112
+
if err != nil {
113
+
return nil, err
114
+
}
115
+
evt.Preserialized = bytes.Clone(blob)
116
+
return evt, nil
117
+
}
118
+
119
+
func (pp *PebblePersist) Playback(ctx context.Context, since int64, cb func(*XRPCStreamEvent) error) error {
120
+
var key [8]byte
121
+
binary.BigEndian.PutUint64(key[:], uint64(since))
122
+
123
+
iter, err := pp.db.NewIterWithContext(ctx, &pebble.IterOptions{LowerBound: key[:]})
124
+
if err != nil {
125
+
return err
126
+
}
127
+
defer iter.Close()
128
+
129
+
for iter.First(); iter.Valid(); iter.Next() {
130
+
evt, err := eventFromPebbleIter(iter)
131
+
if err != nil {
132
+
return err
133
+
}
134
+
135
+
err = cb(evt)
136
+
if err != nil {
137
+
return err
138
+
}
139
+
}
140
+
141
+
return nil
142
+
}
143
+
func (pp *PebblePersist) TakeDownRepo(ctx context.Context, usr models.Uid) error {
144
+
// TODO: implement filter on playback to ignore taken-down-repos?
145
+
return nil
146
+
}
147
+
func (pp *PebblePersist) Flush(context.Context) error {
148
+
return pp.db.Flush()
149
+
}
150
+
func (pp *PebblePersist) Shutdown(context.Context) error {
151
+
if pp.cancel != nil {
152
+
pp.cancel()
153
+
}
154
+
err := pp.db.Close()
155
+
pp.db = nil
156
+
return err
157
+
}
158
+
159
+
func (pp *PebblePersist) SetEventBroadcaster(broadcast func(*XRPCStreamEvent)) {
160
+
pp.broadcast = broadcast
161
+
}
162
+
163
+
var ErrNoLast = errors.New("no last event")
164
+
165
+
func (pp *PebblePersist) GetLast(ctx context.Context) (seq, millis int64, evt *XRPCStreamEvent, err error) {
166
+
iter, err := pp.db.NewIterWithContext(ctx, &pebble.IterOptions{})
167
+
if err != nil {
168
+
return 0, 0, nil, err
169
+
}
170
+
ok := iter.Last()
171
+
if !ok {
172
+
return 0, 0, nil, ErrNoLast
173
+
}
174
+
evt, err = eventFromPebbleIter(iter)
175
+
keyblob := iter.Key()
176
+
seq = int64(binary.BigEndian.Uint64(keyblob[:8]))
177
+
millis = int64(binary.BigEndian.Uint64(keyblob[8:16]))
178
+
return seq, millis, evt, nil
179
+
}
180
+
181
+
// example;
182
+
// ```
183
+
// pp := NewPebblePersistance("/tmp/foo.pebble")
184
+
// go pp.GCThread(context.Background(), 48 * time.Hour, 5 * time.Minute)
185
+
// ```
186
+
func (pp *PebblePersist) GCThread(ctx context.Context) {
187
+
ctx, cancel := context.WithCancel(ctx)
188
+
pp.cancel = cancel
189
+
ticker := time.NewTicker(pp.options.GCPeriod)
190
+
defer ticker.Stop()
191
+
for {
192
+
select {
193
+
case <-ticker.C:
194
+
err := pp.GarbageCollect(ctx)
195
+
if err != nil {
196
+
log.Error("GC err", "err", err)
197
+
}
198
+
case <-ctx.Done():
199
+
return
200
+
}
201
+
}
202
+
}
203
+
204
+
var zeroKey [16]byte
205
+
var ffffKey [16]byte
206
+
207
+
func init() {
208
+
setKeySeqMillis(zeroKey[:], 0, 0)
209
+
for i := range ffffKey {
210
+
ffffKey[i] = 0xff
211
+
}
212
+
}
213
+
214
+
func (pp *PebblePersist) GarbageCollect(ctx context.Context) error {
215
+
nowMillis := time.Now().UnixMilli()
216
+
expired := nowMillis - pp.options.PersistDuration.Milliseconds()
217
+
iter, err := pp.db.NewIterWithContext(ctx, &pebble.IterOptions{})
218
+
if err != nil {
219
+
return err
220
+
}
221
+
defer iter.Close()
222
+
// scan keys to find last expired, then delete range
223
+
var seq int64 = int64(-1)
224
+
var lastKeyTime int64
225
+
for iter.First(); iter.Valid(); iter.Next() {
226
+
keyblob := iter.Key()
227
+
228
+
keyTime := int64(binary.BigEndian.Uint64(keyblob[8:16]))
229
+
if keyTime <= expired {
230
+
lastKeyTime = keyTime
231
+
seq = int64(binary.BigEndian.Uint64(keyblob[:8]))
232
+
} else {
233
+
break
234
+
}
235
+
}
236
+
237
+
// TODO: use pp.options.MaxBytes
238
+
239
+
sizeBefore, _ := pp.db.EstimateDiskUsage(zeroKey[:], ffffKey[:])
240
+
if seq == -1 {
241
+
// nothing to delete
242
+
log.Info("pebble gc nop", "size", sizeBefore)
243
+
return nil
244
+
}
245
+
var key [16]byte
246
+
setKeySeqMillis(key[:], seq, lastKeyTime)
247
+
log.Info("pebble gc start", "to", hex.EncodeToString(key[:]))
248
+
err = pp.db.DeleteRange(zeroKey[:], key[:], pebble.Sync)
249
+
if err != nil {
250
+
return err
251
+
}
252
+
sizeAfter, _ := pp.db.EstimateDiskUsage(zeroKey[:], ffffKey[:])
253
+
log.Info("pebble gc", "before", sizeBefore, "after", sizeAfter)
254
+
start := time.Now()
255
+
err = pp.db.Compact(zeroKey[:], key[:], true)
256
+
if err != nil {
257
+
log.Warn("pebble gc compact", "err", err)
258
+
}
259
+
dt := time.Since(start)
260
+
log.Info("pebble gc compact ok", "dt", dt)
261
+
return nil
262
+
}
+16
events/pebblepersist_test.go
+16
events/pebblepersist_test.go
···
1
+
package events
2
+
3
+
import (
4
+
"gorm.io/gorm"
5
+
"path/filepath"
6
+
"testing"
7
+
)
8
+
9
+
func TestPebblePersist(t *testing.T) {
10
+
factory := func(tempPath string, db *gorm.DB) (EventPersistence, error) {
11
+
opts := DefaultPebblePersistOptions
12
+
opts.DbPath = filepath.Join(tempPath, "pebble.db")
13
+
return NewPebblePersistance(&opts)
14
+
}
15
+
testPersister(t, factory)
16
+
}
+14
-12
events/schedulers/autoscaling/autoscaling.go
+14
-12
events/schedulers/autoscaling/autoscaling.go
···
2
2
3
3
import (
4
4
"context"
5
+
"log/slog"
5
6
"sync"
6
7
"time"
7
8
8
9
"github.com/bluesky-social/indigo/events"
9
10
"github.com/bluesky-social/indigo/events/schedulers"
10
-
logging "github.com/ipfs/go-log"
11
11
"github.com/prometheus/client_golang/prometheus"
12
12
)
13
-
14
-
var log = logging.Logger("autoscaling-scheduler")
15
13
16
14
// Scheduler is a scheduler that will scale up and down the number of workers based on the throughput of the workers.
17
15
type Scheduler struct {
···
40
38
autoscaleFrequency time.Duration
41
39
autoscalerIn chan struct{}
42
40
autoscalerOut chan struct{}
41
+
42
+
log *slog.Logger
43
43
}
44
44
45
45
type AutoscaleSettings struct {
···
99
99
autoscaleFrequency: autoscaleSettings.AutoscaleFrequency,
100
100
autoscalerIn: make(chan struct{}),
101
101
autoscalerOut: make(chan struct{}),
102
+
103
+
log: slog.Default().With("system", "autoscaling-scheduler"),
102
104
}
103
105
104
106
for i := 0; i < p.concurrency; i++ {
···
111
113
}
112
114
113
115
func (p *Scheduler) Shutdown() {
114
-
log.Debugf("shutting down autoscaling scheduler for %s", p.ident)
116
+
p.log.Debug("shutting down autoscaling scheduler", "ident", p.ident)
115
117
116
118
// stop autoscaling
117
119
p.autoscalerIn <- struct{}{}
118
120
close(p.autoscalerIn)
119
121
<-p.autoscalerOut
120
122
121
-
log.Debug("stopping autoscaling scheduler workers")
123
+
p.log.Debug("stopping autoscaling scheduler workers")
122
124
// stop workers
123
125
for i := 0; i < p.concurrency; i++ {
124
126
p.feeder <- &consumerTask{signal: "stop"}
125
127
}
126
128
close(p.feeder)
127
129
128
-
log.Debug("waiting for autoscaling scheduler workers to stop")
130
+
p.log.Debug("waiting for autoscaling scheduler workers to stop")
129
131
130
132
p.workerGroup.Wait()
131
133
132
-
log.Debug("stopping autoscaling scheduler throughput manager")
134
+
p.log.Debug("stopping autoscaling scheduler throughput manager")
133
135
p.throughputManager.Stop()
134
136
135
-
log.Debug("autoscaling scheduler shutdown complete")
137
+
p.log.Debug("autoscaling scheduler shutdown complete")
136
138
}
137
139
138
140
// Add autoscaling function
···
197
199
}
198
200
199
201
func (p *Scheduler) worker() {
200
-
log.Debugf("starting autoscaling worker for %s", p.ident)
202
+
p.log.Debug("starting autoscaling worker", "ident", p.ident)
201
203
p.workersActive.Inc()
202
204
p.workerGroup.Add(1)
203
205
defer p.workerGroup.Done()
···
205
207
for work != nil {
206
208
// Check if the work item contains a signal to stop the worker.
207
209
if work.signal == "stop" {
208
-
log.Debugf("stopping autoscaling worker for %s", p.ident)
210
+
p.log.Debug("stopping autoscaling worker", "ident", p.ident)
209
211
p.workersActive.Dec()
210
212
return
211
213
}
212
214
213
215
p.itemsActive.Inc()
214
216
if err := p.do(context.TODO(), work.val); err != nil {
215
-
log.Errorf("event handler failed: %s", err)
217
+
p.log.Error("event handler failed", "err", err)
216
218
}
217
219
p.itemsProcessed.Inc()
218
220
219
221
p.lk.Lock()
220
222
rem, ok := p.active[work.repo]
221
223
if !ok {
222
-
log.Errorf("should always have an 'active' entry if a worker is processing a job")
224
+
p.log.Error("should always have an 'active' entry if a worker is processing a job")
223
225
}
224
226
225
227
if len(rem) == 0 {
+9
-7
events/schedulers/parallel/parallel.go
+9
-7
events/schedulers/parallel/parallel.go
···
2
2
3
3
import (
4
4
"context"
5
+
"log/slog"
5
6
"sync"
6
7
7
8
"github.com/bluesky-social/indigo/events"
8
9
"github.com/bluesky-social/indigo/events/schedulers"
9
-
logging "github.com/ipfs/go-log"
10
10
11
11
"github.com/prometheus/client_golang/prometheus"
12
12
)
13
-
14
-
var log = logging.Logger("parallel-scheduler")
15
13
16
14
// Scheduler is a parallel scheduler that will run work on a fixed number of workers
17
15
type Scheduler struct {
···
33
31
itemsProcessed prometheus.Counter
34
32
itemsActive prometheus.Counter
35
33
workesActive prometheus.Gauge
34
+
35
+
log *slog.Logger
36
36
}
37
37
38
38
func NewScheduler(maxC, maxQ int, ident string, do func(context.Context, *events.XRPCStreamEvent) error) *Scheduler {
···
52
52
itemsProcessed: schedulers.WorkItemsProcessed.WithLabelValues(ident, "parallel"),
53
53
itemsActive: schedulers.WorkItemsActive.WithLabelValues(ident, "parallel"),
54
54
workesActive: schedulers.WorkersActive.WithLabelValues(ident, "parallel"),
55
+
56
+
log: slog.Default().With("system", "parallel-scheduler"),
55
57
}
56
58
57
59
for i := 0; i < maxC; i++ {
···
64
66
}
65
67
66
68
func (p *Scheduler) Shutdown() {
67
-
log.Infof("shutting down parallel scheduler for %s", p.ident)
69
+
p.log.Info("shutting down parallel scheduler", "ident", p.ident)
68
70
69
71
for i := 0; i < p.maxConcurrency; i++ {
70
72
p.feeder <- &consumerTask{
···
78
80
<-p.out
79
81
}
80
82
81
-
log.Info("parallel scheduler shutdown complete")
83
+
p.log.Info("parallel scheduler shutdown complete")
82
84
}
83
85
84
86
type consumerTask struct {
···
123
125
124
126
p.itemsActive.Inc()
125
127
if err := p.do(context.TODO(), work.val); err != nil {
126
-
log.Errorf("event handler failed: %s", err)
128
+
p.log.Error("event handler failed", "err", err)
127
129
}
128
130
p.itemsProcessed.Inc()
129
131
130
132
p.lk.Lock()
131
133
rem, ok := p.active[work.repo]
132
134
if !ok {
133
-
log.Errorf("should always have an 'active' entry if a worker is processing a job")
135
+
p.log.Error("should always have an 'active' entry if a worker is processing a job")
134
136
}
135
137
136
138
if len(rem) == 0 {
+1
-3
events/schedulers/sequential/sequential.go
+1
-3
events/schedulers/sequential/sequential.go
···
2
2
3
3
import (
4
4
"context"
5
-
6
5
"github.com/bluesky-social/indigo/events"
7
6
"github.com/bluesky-social/indigo/events/schedulers"
8
-
logging "github.com/ipfs/go-log"
9
7
"github.com/prometheus/client_golang/prometheus"
10
8
)
11
9
12
-
var log = logging.Logger("sequential-scheduler")
10
+
// var log = slog.Default().With("system", "sequential-scheduler")
13
11
14
12
// Scheduler is a sequential scheduler that will run work on a single worker
15
13
type Scheduler struct {
+3
-7
fakedata/accounts.go
+3
-7
fakedata/accounts.go
···
20
20
21
21
func (ac *AccountCatalog) Combined() []AccountContext {
22
22
var combined []AccountContext
23
-
for _, c := range ac.Celebs {
24
-
combined = append(combined, c)
25
-
}
26
-
for _, r := range ac.Regulars {
27
-
combined = append(combined, r)
28
-
}
23
+
combined = append(combined, ac.Celebs...)
24
+
combined = append(combined, ac.Regulars...)
29
25
return combined
30
26
}
31
27
···
72
68
return nil, fmt.Errorf("account index didn't match: %d != %d (%s)", i, u.Index, u.AccountType)
73
69
}
74
70
}
75
-
log.Infof("loaded account catalog: regular=%d celebrity=%d", len(catalog.Regulars), len(catalog.Celebs))
71
+
log.Info("loaded account catalog", "regular", len(catalog.Regulars), "celebrity", len(catalog.Celebs))
76
72
return catalog, nil
77
73
}
78
74
+8
-4
fakedata/generators.go
+8
-4
fakedata/generators.go
···
7
7
"bytes"
8
8
"context"
9
9
"fmt"
10
+
"log/slog"
10
11
"math/rand"
11
12
"time"
12
13
···
16
17
"github.com/bluesky-social/indigo/xrpc"
17
18
18
19
"github.com/brianvoe/gofakeit/v6"
19
-
logging "github.com/ipfs/go-log"
20
20
)
21
21
22
-
var log = logging.Logger("fakedata")
22
+
var log = slog.Default().With("system", "fakedata")
23
+
24
+
func SetLogger(logger *slog.Logger) {
25
+
log = logger
26
+
}
23
27
24
28
func MeasureIterations(name string) func(int) {
25
29
start := time.Now()
···
28
32
return
29
33
}
30
34
total := time.Since(start)
31
-
log.Infof("%s wall runtime: count=%d total=%s mean=%s", name, count, total, total/time.Duration(count))
35
+
log.Info("wall runtime", "name", name, "count", count, "total", total, "rate", total/time.Duration(count))
32
36
}
33
37
}
34
38
···
386
390
func BrowseAccount(xrpcc *xrpc.Client, acc *AccountContext) error {
387
391
// fetch notifications
388
392
maxNotif := 50
389
-
resp, err := appbsky.NotificationListNotifications(context.TODO(), xrpcc, "", int64(maxNotif), false, "")
393
+
resp, err := appbsky.NotificationListNotifications(context.TODO(), xrpcc, "", int64(maxNotif), false, nil, "")
390
394
if err != nil {
391
395
return err
392
396
}
+21
-10
go.mod
+21
-10
go.mod
···
1
1
module github.com/bluesky-social/indigo
2
2
3
-
go 1.22
3
+
go 1.23
4
4
5
5
require (
6
6
contrib.go.opencensus.io/exporter/prometheus v0.4.2
···
8
8
github.com/RussellLuo/slidingwindow v0.0.0-20200528002341-535bb99d338b
9
9
github.com/adrg/xdg v0.5.0
10
10
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de
11
+
github.com/bradfitz/gomemcache v0.0.0-20230905024940-24af94b03874
11
12
github.com/brianvoe/gofakeit/v6 v6.25.0
12
13
github.com/carlmjohnson/versioninfo v0.22.5
14
+
github.com/cockroachdb/pebble v1.1.2
13
15
github.com/dustinkirkland/golang-petname v0.0.0-20231002161417-6a283f1aaaf2
14
16
github.com/flosch/pongo2/v6 v6.0.0
15
17
github.com/go-redis/cache/v9 v9.0.0
16
18
github.com/goccy/go-json v0.10.2
17
-
github.com/gocql/gocql v1.7.0
18
19
github.com/golang-jwt/jwt v3.2.2+incompatible
19
20
github.com/gorilla/websocket v1.5.1
20
21
github.com/hashicorp/go-retryablehttp v0.7.5
···
30
31
github.com/ipfs/go-ipld-cbor v0.1.0
31
32
github.com/ipfs/go-ipld-format v0.6.0
32
33
github.com/ipfs/go-libipfs v0.7.0
33
-
github.com/ipfs/go-log v1.0.5
34
34
github.com/ipfs/go-log/v2 v2.5.1
35
35
github.com/ipld/go-car v0.6.1-0.20230509095817-92d28eb23ba4
36
36
github.com/ipld/go-car/v2 v2.13.1
···
56
56
github.com/urfave/cli/v2 v2.25.7
57
57
github.com/whyrusleeping/cbor-gen v0.2.1-0.20241030202151-b7a6831be65e
58
58
github.com/whyrusleeping/go-did v0.0.0-20230824162731-404d1707d5d6
59
+
github.com/xlab/treeprint v1.2.0
59
60
gitlab.com/yawning/secp256k1-voi v0.0.0-20230925100816-f2616030848b
60
61
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1
61
62
go.opentelemetry.io/otel v1.21.0
···
65
66
go.opentelemetry.io/otel/sdk v1.21.0
66
67
go.opentelemetry.io/otel/trace v1.21.0
67
68
go.uber.org/automaxprocs v1.5.3
69
+
go.uber.org/zap v1.26.0
68
70
golang.org/x/crypto v0.21.0
69
-
golang.org/x/sync v0.5.0
71
+
golang.org/x/sync v0.7.0
70
72
golang.org/x/text v0.14.0
71
73
golang.org/x/time v0.3.0
72
74
golang.org/x/tools v0.15.0
···
78
80
)
79
81
80
82
require (
83
+
github.com/DataDog/zstd v1.4.5 // indirect
84
+
github.com/cockroachdb/errors v1.11.3 // indirect
85
+
github.com/cockroachdb/fifo v0.0.0-20240606204812-0bbfbd93a7ce // indirect
86
+
github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b // indirect
87
+
github.com/cockroachdb/redact v1.1.5 // indirect
88
+
github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 // indirect
81
89
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
90
+
github.com/getsentry/sentry-go v0.27.0 // indirect
82
91
github.com/go-redis/redis v6.15.9+incompatible // indirect
83
-
github.com/golang/snappy v0.0.3 // indirect
84
-
github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed // indirect
92
+
github.com/golang/snappy v0.0.4 // indirect
85
93
github.com/hashicorp/golang-lru v1.0.2 // indirect
94
+
github.com/ipfs/go-log v1.0.5 // indirect
86
95
github.com/jackc/puddle/v2 v2.2.1 // indirect
87
96
github.com/klauspost/compress v1.17.3 // indirect
97
+
github.com/kr/pretty v0.3.1 // indirect
98
+
github.com/kr/text v0.2.0 // indirect
88
99
github.com/labstack/gommon v0.4.1 // indirect
89
100
github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect
90
101
github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect
102
+
github.com/pkg/errors v0.9.1 // indirect
103
+
github.com/rogpeppe/go-internal v1.10.0 // indirect
91
104
github.com/vmihailenco/go-tinylfu v0.2.2 // indirect
92
105
github.com/vmihailenco/msgpack/v5 v5.4.1 // indirect
93
106
github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
94
107
github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect
95
-
go.uber.org/zap v1.26.0 // indirect
96
108
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa // indirect
97
-
gopkg.in/inf.v0 v0.9.1 // indirect
98
109
)
99
110
100
111
require (
···
167
178
go.uber.org/atomic v1.11.0 // indirect
168
179
go.uber.org/multierr v1.11.0 // indirect
169
180
golang.org/x/mod v0.14.0 // indirect
170
-
golang.org/x/net v0.21.0 // indirect
181
+
golang.org/x/net v0.23.0 // indirect
171
182
golang.org/x/sys v0.22.0 // indirect
172
183
google.golang.org/genproto/googleapis/api v0.0.0-20231120223509-83a465c0220f // indirect
173
184
google.golang.org/genproto/googleapis/rpc v0.0.0-20231120223509-83a465c0220f // indirect
174
185
google.golang.org/grpc v1.59.0 // indirect
175
-
google.golang.org/protobuf v1.31.0 // indirect
186
+
google.golang.org/protobuf v1.33.0 // indirect
176
187
gopkg.in/yaml.v2 v2.4.0 // indirect
177
188
gopkg.in/yaml.v3 v3.0.1 // indirect
178
189
lukechampine.com/blake3 v1.2.1 // indirect
+37
-18
go.sum
+37
-18
go.sum
···
35
35
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
36
36
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
37
37
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
38
+
github.com/DataDog/zstd v1.4.5 h1:EndNeuB0l9syBZhut0wns3gV1hL8zX8LIu6ZiVHWLIQ=
39
+
github.com/DataDog/zstd v1.4.5/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo=
38
40
github.com/PuerkitoBio/purell v1.2.1 h1:QsZ4TjvwiMpat6gBCBxEQI0rcS9ehtkKtSpiUnd9N28=
39
41
github.com/PuerkitoBio/purell v1.2.1/go.mod h1:ZwHcC/82TOaovDi//J/804umJFFmbOHPngi8iYYv/Eo=
40
42
github.com/RussellLuo/slidingwindow v0.0.0-20200528002341-535bb99d338b h1:5/++qT1/z812ZqBvqQt6ToRswSuPZ/B33m6xVHRzADU=
···
71
73
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
72
74
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
73
75
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
74
-
github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932 h1:mXoPYz/Ul5HYEDvkta6I8/rnYM5gSdSV2tJ6XbZuEtY=
75
-
github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932/go.mod h1:NOuUCSz6Q9T7+igc/hlvDOUdtWKryOrtFyIVABv/p7k=
76
-
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY=
77
-
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4=
76
+
github.com/bradfitz/gomemcache v0.0.0-20230905024940-24af94b03874 h1:N7oVaKyGp8bttX0bfZGmcGkjz7DLQXhAn3DNd3T0ous=
77
+
github.com/bradfitz/gomemcache v0.0.0-20230905024940-24af94b03874/go.mod h1:r5xuitiExdLAJ09PR7vBVENGvp4ZuTBeWTGtxuX3K+c=
78
78
github.com/brianvoe/gofakeit/v6 v6.25.0 h1:ZpFjktOpLZUeF8q223o0rUuXtA+m5qW5srjvVi+JkXk=
79
79
github.com/brianvoe/gofakeit/v6 v6.25.0/go.mod h1:Xj58BMSnFqcn/fAQeSK+/PLtC5kSb7FJIq4JyGa8vEs=
80
80
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
···
95
95
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
96
96
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
97
97
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
98
+
github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f h1:otljaYPt5hWxV3MUfO5dFPFiOXg9CyG5/kCfayTqsJ4=
99
+
github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU=
100
+
github.com/cockroachdb/errors v1.11.3 h1:5bA+k2Y6r+oz/6Z/RFlNeVCesGARKuC6YymtcDrbC/I=
101
+
github.com/cockroachdb/errors v1.11.3/go.mod h1:m4UIW4CDjx+R5cybPsNrRbreomiFqt8o1h1wUVazSd8=
102
+
github.com/cockroachdb/fifo v0.0.0-20240606204812-0bbfbd93a7ce h1:giXvy4KSc/6g/esnpM7Geqxka4WSqI1SZc7sMJFd3y4=
103
+
github.com/cockroachdb/fifo v0.0.0-20240606204812-0bbfbd93a7ce/go.mod h1:9/y3cnZ5GKakj/H4y9r9GTjCvAFta7KLgSHPJJYc52M=
104
+
github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b h1:r6VH0faHjZeQy818SGhaone5OnYfxFR/+AzdY3sf5aE=
105
+
github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs=
106
+
github.com/cockroachdb/pebble v1.1.2 h1:CUh2IPtR4swHlEj48Rhfzw6l/d0qA31fItcIszQVIsA=
107
+
github.com/cockroachdb/pebble v1.1.2/go.mod h1:4exszw1r40423ZsmkG/09AFEG83I0uDgfujJdbL6kYU=
108
+
github.com/cockroachdb/redact v1.1.5 h1:u1PMllDkdFfPWaNGMyLD1+so+aq3uUItthCFqzwPJ30=
109
+
github.com/cockroachdb/redact v1.1.5/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg=
110
+
github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 h1:zuQyyAKVxetITBuuhv3BI9cMrmStnpT18zmgmTxunpo=
111
+
github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06/go.mod h1:7nc4anLGjupUW/PeY5qiNYsdNXj7zopG+eqsS7To5IQ=
98
112
github.com/corpix/uarand v0.2.0 h1:U98xXwud/AVuCpkpgfPF7J5TQgr7R5tqT8VZP5KWbzE=
99
113
github.com/corpix/uarand v0.2.0/go.mod h1:/3Z1QIqWkDIhf6XWn/08/uMHoQ8JUoTIKc2iPchBOmM=
100
114
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
···
126
140
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
127
141
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
128
142
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
143
+
github.com/getsentry/sentry-go v0.27.0 h1:Pv98CIbtB3LkMWmXi4Joa5OOcwbmnX88sF5qbK3r3Ps=
144
+
github.com/getsentry/sentry-go v0.27.0/go.mod h1:lc76E2QywIyW8WuBnwl8Lc4bkmQH4+w1gwTf25trprY=
145
+
github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA=
146
+
github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og=
129
147
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
130
148
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
131
149
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
···
156
174
github.com/go-yaml/yaml v2.1.0+incompatible/go.mod h1:w2MrLa16VYP0jy6N7M5kHaCkaLENm+P+Tv+MfurjSw0=
157
175
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
158
176
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
159
-
github.com/gocql/gocql v1.7.0 h1:O+7U7/1gSN7QTEAaMEsJc1Oq2QHXvCWoF3DFK9HDHus=
160
-
github.com/gocql/gocql v1.7.0/go.mod h1:vnlvXyFZeLBF0Wy+RS8hrOdbn0UWsWtdg07XJnFxZ+4=
161
177
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
162
178
github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4=
163
179
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
···
195
211
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
196
212
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
197
213
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
198
-
github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA=
199
-
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
214
+
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
215
+
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
200
216
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
201
217
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
202
218
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
···
239
255
github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY=
240
256
github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1 h1:6UKoz5ujsI55KNpsJH3UwCq3T8kKbZwNZBNPuTTje8U=
241
257
github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1/go.mod h1:YvJ2f6MplWDhfxiUC3KpyTy76kYUZA4W3pTv/wdKQ9Y=
242
-
github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed h1:5upAirOpQc1Q53c0bnx2ufif5kANL7bfZWcc6VJWJd8=
243
-
github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4=
244
258
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
245
259
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
246
260
github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI=
···
512
526
github.com/orandin/slog-gorm v1.3.2/go.mod h1:MoZ51+b7xE9lwGNPYEhxcUtRNrYzjdcKvA8QXQQGEPA=
513
527
github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 h1:1/WtZae0yGtPq+TI6+Tv1WTxkukpXeMlviSxvL7SRgk=
514
528
github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9/go.mod h1:x3N5drFsm2uilKKuuYo6LdyD8vZAW55sH/9w+pbo1sw=
529
+
github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4=
530
+
github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
531
+
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
515
532
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
516
533
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
534
+
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
517
535
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
518
536
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
519
537
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
···
564
582
github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
565
583
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
566
584
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
585
+
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
567
586
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
568
587
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
569
588
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
···
632
651
github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f/go.mod h1:p9UJB6dDgdPgMJZs7UjUOdulKyRr9fqkS+6JKAInPy8=
633
652
github.com/whyrusleeping/go-did v0.0.0-20230824162731-404d1707d5d6 h1:yJ9/LwIGIk/c0CdoavpC9RNSGSruIspSZtxG3Nnldic=
634
653
github.com/whyrusleeping/go-did v0.0.0-20230824162731-404d1707d5d6/go.mod h1:39U9RRVr4CKbXpXYopWn+FSH5s+vWu6+RmguSPWAq5s=
654
+
github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ=
655
+
github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0=
635
656
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
636
657
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
637
658
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
···
795
816
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
796
817
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
797
818
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
798
-
golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4=
799
-
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
819
+
golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
820
+
golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
800
821
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
801
822
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
802
823
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
···
818
839
golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
819
840
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
820
841
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
821
-
golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
822
-
golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
842
+
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
843
+
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
823
844
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
824
845
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
825
846
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
···
1061
1082
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
1062
1083
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
1063
1084
google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
1064
-
google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
1065
-
google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
1085
+
google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
1086
+
google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
1066
1087
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
1067
1088
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
1068
1089
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
···
1071
1092
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
1072
1093
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
1073
1094
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
1074
-
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
1075
-
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
1076
1095
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
1077
1096
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
1078
1097
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+40
-5
indexer/crawler.go
+40
-5
indexer/crawler.go
···
3
3
import (
4
4
"context"
5
5
"fmt"
6
+
"log/slog"
6
7
"sync"
8
+
"time"
7
9
8
10
comatproto "github.com/bluesky-social/indigo/api/atproto"
9
11
"github.com/bluesky-social/indigo/models"
···
27
29
doRepoCrawl func(context.Context, *crawlWork) error
28
30
29
31
concurrency int
32
+
33
+
log *slog.Logger
34
+
35
+
done chan struct{}
30
36
}
31
37
32
-
func NewCrawlDispatcher(repoFn func(context.Context, *crawlWork) error, concurrency int) (*CrawlDispatcher, error) {
38
+
func NewCrawlDispatcher(repoFn func(context.Context, *crawlWork) error, concurrency int, log *slog.Logger) (*CrawlDispatcher, error) {
33
39
if concurrency < 1 {
34
40
return nil, fmt.Errorf("must specify a non-zero positive integer for crawl dispatcher concurrency")
35
41
}
36
42
37
-
return &CrawlDispatcher{
43
+
out := &CrawlDispatcher{
38
44
ingest: make(chan *models.ActorInfo),
39
45
repoSync: make(chan *crawlWork),
40
46
complete: make(chan models.Uid),
···
43
49
concurrency: concurrency,
44
50
todo: make(map[models.Uid]*crawlWork),
45
51
inProgress: make(map[models.Uid]*crawlWork),
46
-
}, nil
52
+
log: log,
53
+
done: make(chan struct{}),
54
+
}
55
+
go out.CatchupRepoGaugePoller()
56
+
57
+
return out, nil
47
58
}
48
59
49
60
func (c *CrawlDispatcher) Run() {
···
52
63
for i := 0; i < c.concurrency; i++ {
53
64
go c.fetchWorker()
54
65
}
66
+
}
67
+
68
+
func (c *CrawlDispatcher) Shutdown() {
69
+
close(c.done)
55
70
}
56
71
57
72
type catchupJob struct {
···
173
188
}
174
189
175
190
func (c *CrawlDispatcher) addToCatchupQueue(catchup *catchupJob) *crawlWork {
176
-
catchupEventsEnqueued.Inc()
177
191
c.maplk.Lock()
178
192
defer c.maplk.Unlock()
179
193
180
194
// If the actor crawl is enqueued, we can append to the catchup queue which gets emptied during the crawl
181
195
job, ok := c.todo[catchup.user.Uid]
182
196
if ok {
197
+
catchupEventsEnqueued.WithLabelValues("todo").Inc()
183
198
job.catchup = append(job.catchup, catchup)
184
199
return nil
185
200
}
···
187
202
// If the actor crawl is in progress, we can append to the nextr queue which gets emptied after the crawl
188
203
job, ok = c.inProgress[catchup.user.Uid]
189
204
if ok {
205
+
catchupEventsEnqueued.WithLabelValues("prog").Inc()
190
206
job.next = append(job.next, catchup)
191
207
return nil
192
208
}
193
209
210
+
catchupEventsEnqueued.WithLabelValues("new").Inc()
194
211
// Otherwise, we need to create a new crawl job for this actor and enqueue it
195
212
cw := &crawlWork{
196
213
act: catchup.user,
···
205
222
select {
206
223
case job := <-c.repoSync:
207
224
if err := c.doRepoCrawl(context.TODO(), job); err != nil {
208
-
log.Errorf("failed to perform repo crawl of %q: %s", job.act.Did, err)
225
+
c.log.Error("failed to perform repo crawl", "did", job.act.Did, "err", err)
209
226
}
210
227
211
228
// TODO: do we still just do this if it errors?
···
269
286
270
287
return false
271
288
}
289
+
290
+
func (c *CrawlDispatcher) countReposInSlowPath() int {
291
+
c.maplk.Lock()
292
+
defer c.maplk.Unlock()
293
+
return len(c.inProgress) + len(c.todo)
294
+
}
295
+
296
+
func (c *CrawlDispatcher) CatchupRepoGaugePoller() {
297
+
ticker := time.NewTicker(30 * time.Second)
298
+
defer ticker.Stop()
299
+
for {
300
+
select {
301
+
case <-c.done:
302
+
case <-ticker.C:
303
+
catchupReposGauge.Set(float64(c.countReposInSlowPath()))
304
+
}
305
+
}
306
+
}
+38
-30
indexer/indexer.go
+38
-30
indexer/indexer.go
···
5
5
"database/sql"
6
6
"errors"
7
7
"fmt"
8
+
"log/slog"
8
9
"time"
9
10
10
11
comatproto "github.com/bluesky-social/indigo/api/atproto"
···
19
20
"github.com/bluesky-social/indigo/xrpc"
20
21
21
22
"github.com/ipfs/go-cid"
22
-
logging "github.com/ipfs/go-log"
23
23
"go.opentelemetry.io/otel"
24
24
"gorm.io/gorm"
25
25
"gorm.io/gorm/clause"
26
26
)
27
27
28
-
var log = logging.Logger("indexer")
29
-
30
28
const MaxEventSliceLength = 1000000
31
29
const MaxOpsSliceLength = 200
32
30
···
45
43
SendRemoteFollow func(context.Context, string, uint) error
46
44
CreateExternalUser func(context.Context, string) (*models.ActorInfo, error)
47
45
ApplyPDSClientSettings func(*xrpc.Client)
46
+
47
+
log *slog.Logger
48
48
}
49
49
50
50
func NewIndexer(db *gorm.DB, notifman notifs.NotificationManager, evtman *events.EventManager, didr did.Resolver, fetcher *RepoFetcher, crawl, aggregate, spider bool) (*Indexer, error) {
···
65
65
return nil
66
66
},
67
67
ApplyPDSClientSettings: func(*xrpc.Client) {},
68
+
log: slog.Default().With("system", "indexer"),
68
69
}
69
70
70
71
if crawl {
71
-
c, err := NewCrawlDispatcher(fetcher.FetchAndIndexRepo, fetcher.MaxConcurrency)
72
+
c, err := NewCrawlDispatcher(fetcher.FetchAndIndexRepo, fetcher.MaxConcurrency, ix.log)
72
73
if err != nil {
73
74
return nil, err
74
75
}
···
80
81
return ix, nil
81
82
}
82
83
84
+
func (ix *Indexer) Shutdown() {
85
+
if ix.Crawler != nil {
86
+
ix.Crawler.Shutdown()
87
+
}
88
+
}
89
+
83
90
func (ix *Indexer) HandleRepoEvent(ctx context.Context, evt *repomgr.RepoEvent) error {
84
91
ctx, span := otel.Tracer("indexer").Start(ctx, "HandleRepoEvent")
85
92
defer span.End()
86
93
87
-
log.Debugw("Handling Repo Event!", "uid", evt.User)
94
+
ix.log.Debug("Handling Repo Event!", "uid", evt.User)
88
95
89
96
outops := make([]*comatproto.SyncSubscribeRepos_RepoOp, 0, len(evt.Ops))
90
97
for _, op := range evt.Ops {
···
96
103
})
97
104
98
105
if err := ix.handleRepoOp(ctx, evt, &op); err != nil {
99
-
log.Errorw("failed to handle repo op", "err", err)
106
+
ix.log.Error("failed to handle repo op", "err", err)
100
107
}
101
108
}
102
109
···
113
120
toobig = true
114
121
}
115
122
116
-
log.Debugw("Sending event", "did", did)
123
+
ix.log.Debug("Sending event", "did", did)
117
124
if err := ix.events.AddEvent(ctx, &events.XRPCStreamEvent{
118
125
RepoCommit: &comatproto.SyncSubscribeRepos_Commit{
119
126
Repo: did,
···
191
198
if e.Type == "mention" {
192
199
_, err := ix.GetUserOrMissing(ctx, e.Value)
193
200
if err != nil {
194
-
log.Infow("failed to parse user mention", "ref", e.Value, "err", err)
201
+
ix.log.Info("failed to parse user mention", "ref", e.Value, "err", err)
195
202
}
196
203
}
197
204
}
···
199
206
if rec.Reply != nil {
200
207
if rec.Reply.Parent != nil {
201
208
if err := ix.crawlAtUriRef(ctx, rec.Reply.Parent.Uri); err != nil {
202
-
log.Infow("failed to crawl reply parent", "cid", op.RecCid, "replyuri", rec.Reply.Parent.Uri, "err", err)
209
+
ix.log.Info("failed to crawl reply parent", "cid", op.RecCid, "replyuri", rec.Reply.Parent.Uri, "err", err)
203
210
}
204
211
}
205
212
206
213
if rec.Reply.Root != nil {
207
214
if err := ix.crawlAtUriRef(ctx, rec.Reply.Root.Uri); err != nil {
208
-
log.Infow("failed to crawl reply root", "cid", op.RecCid, "rooturi", rec.Reply.Root.Uri, "err", err)
215
+
ix.log.Info("failed to crawl reply root", "cid", op.RecCid, "rooturi", rec.Reply.Root.Uri, "err", err)
209
216
}
210
217
}
211
218
}
···
214
221
case *bsky.FeedRepost:
215
222
if rec.Subject != nil {
216
223
if err := ix.crawlAtUriRef(ctx, rec.Subject.Uri); err != nil {
217
-
log.Infow("failed to crawl repost subject", "cid", op.RecCid, "subjecturi", rec.Subject.Uri, "err", err)
224
+
ix.log.Info("failed to crawl repost subject", "cid", op.RecCid, "subjecturi", rec.Subject.Uri, "err", err)
218
225
}
219
226
}
220
227
return nil
221
228
case *bsky.FeedLike:
222
229
if rec.Subject != nil {
223
230
if err := ix.crawlAtUriRef(ctx, rec.Subject.Uri); err != nil {
224
-
log.Infow("failed to crawl like subject", "cid", op.RecCid, "subjecturi", rec.Subject.Uri, "err", err)
231
+
ix.log.Info("failed to crawl like subject", "cid", op.RecCid, "subjecturi", rec.Subject.Uri, "err", err)
225
232
}
226
233
}
227
234
return nil
228
235
case *bsky.GraphFollow:
229
236
_, err := ix.GetUserOrMissing(ctx, rec.Subject)
230
237
if err != nil {
231
-
log.Infow("failed to crawl follow subject", "cid", op.RecCid, "subjectdid", rec.Subject, "err", err)
238
+
ix.log.Info("failed to crawl follow subject", "cid", op.RecCid, "subjectdid", rec.Subject, "err", err)
232
239
}
233
240
return nil
234
241
case *bsky.GraphBlock:
235
242
_, err := ix.GetUserOrMissing(ctx, rec.Subject)
236
243
if err != nil {
237
-
log.Infow("failed to crawl follow subject", "cid", op.RecCid, "subjectdid", rec.Subject, "err", err)
244
+
ix.log.Info("failed to crawl follow subject", "cid", op.RecCid, "subjectdid", rec.Subject, "err", err)
238
245
}
239
246
return nil
240
247
case *bsky.ActorProfile:
···
246
253
case *bsky.FeedGenerator:
247
254
return nil
248
255
default:
249
-
log.Warnw("unrecognized record type (crawling references)", "record", op.Record, "collection", op.Collection)
256
+
ix.log.Warn("unrecognized record type (crawling references)", "record", op.Record, "collection", op.Collection)
250
257
return nil
251
258
}
252
259
}
···
287
294
}
288
295
289
296
func (ix *Indexer) addUserToCrawler(ctx context.Context, ai *models.ActorInfo) error {
290
-
log.Debugw("Sending user to crawler: ", "did", ai.Did)
297
+
ix.log.Debug("Sending user to crawler: ", "did", ai.Did)
291
298
if ix.Crawler == nil {
292
299
return nil
293
300
}
···
389
396
}
390
397
391
398
func (ix *Indexer) handleRecordDelete(ctx context.Context, evt *repomgr.RepoEvent, op *repomgr.RepoOp, local bool) error {
392
-
log.Debugw("record delete event", "collection", op.Collection)
399
+
ix.log.Debug("record delete event", "collection", op.Collection)
393
400
394
401
switch op.Collection {
395
402
case "app.bsky.feed.post":
···
405
412
fp, err := ix.GetPost(ctx, uri)
406
413
if err != nil {
407
414
if errors.Is(err, gorm.ErrRecordNotFound) {
408
-
log.Warnw("deleting post weve never seen before. Weird.", "user", evt.User, "rkey", op.Rkey)
415
+
ix.log.Warn("deleting post weve never seen before. Weird.", "user", evt.User, "rkey", op.Rkey)
409
416
return nil
410
417
}
411
418
return err
···
419
426
return err
420
427
}
421
428
422
-
log.Warn("TODO: remove notifications on delete")
429
+
ix.log.Warn("TODO: remove notifications on delete")
423
430
/*
424
431
if err := ix.notifman.RemoveRepost(ctx, fp.Author, rr.ID, evt.User); err != nil {
425
432
return nil, err
···
460
467
return err
461
468
}
462
469
463
-
log.Warnf("need to delete vote notification")
470
+
ix.log.Warn("need to delete vote notification")
464
471
return nil
465
472
}
466
473
···
471
478
}
472
479
473
480
if q.RowsAffected == 0 {
474
-
log.Warnw("attempted to delete follow we did not have a record for", "user", evt.User, "rkey", op.Rkey)
481
+
ix.log.Warn("attempted to delete follow we did not have a record for", "user", evt.User, "rkey", op.Rkey)
475
482
return nil
476
483
}
477
484
···
479
486
}
480
487
481
488
func (ix *Indexer) handleRecordCreate(ctx context.Context, evt *repomgr.RepoEvent, op *repomgr.RepoOp, local bool) ([]uint, error) {
482
-
log.Debugw("record create event", "collection", op.Collection)
489
+
ix.log.Debug("record create event", "collection", op.Collection)
483
490
484
491
var out []uint
485
492
switch rec := op.Record.(type) {
···
529
536
case *bsky.FeedGenerator:
530
537
return out, nil
531
538
case *bsky.ActorProfile:
532
-
log.Debugf("TODO: got actor profile record creation, need to do something with this")
539
+
ix.log.Debug("TODO: got actor profile record creation, need to do something with this")
533
540
default:
541
+
ix.log.Warn("unrecognized record", "record", op.Record, "collection", op.Collection)
534
542
return nil, fmt.Errorf("unrecognized record type (creation): %s", op.Collection)
535
543
}
536
544
···
603
611
}
604
612
605
613
func (ix *Indexer) handleRecordUpdate(ctx context.Context, evt *repomgr.RepoEvent, op *repomgr.RepoOp, local bool) error {
606
-
log.Debugw("record update event", "collection", op.Collection)
614
+
ix.log.Debug("record update event", "collection", op.Collection)
607
615
608
616
switch rec := op.Record.(type) {
609
617
case *bsky.FeedPost:
···
623
631
624
632
if oldReply != newReply {
625
633
// the 'replyness' of the post was changed... that's weird
626
-
log.Errorf("need to properly handle case where reply-ness of posts is changed")
634
+
ix.log.Error("need to properly handle case where reply-ness of posts is changed")
627
635
return nil
628
636
}
629
637
···
634
642
}
635
643
636
644
if replyto.ID != fp.ReplyTo {
637
-
log.Errorf("post was changed to be a reply to a different post")
645
+
ix.log.Error("post was changed to be a reply to a different post")
638
646
return nil
639
647
}
640
648
}
···
687
695
688
696
return ix.handleRecordCreateGraphFollow(ctx, rec, evt, op)
689
697
case *bsky.ActorProfile:
690
-
log.Debugf("TODO: got actor profile record update, need to do something with this")
698
+
ix.log.Debug("TODO: got actor profile record update, need to do something with this")
691
699
default:
692
700
return fmt.Errorf("unrecognized record type (update): %s", op.Collection)
693
701
}
···
761
769
// we're likely filling in a missing reference
762
770
if !maybe.Missing {
763
771
// TODO: we've already processed this record creation
764
-
log.Warnw("potentially erroneous event, duplicate create", "rkey", rkey, "user", user)
772
+
ix.log.Warn("potentially erroneous event, duplicate create", "rkey", rkey, "user", user)
765
773
}
766
774
767
775
if err := ix.db.Clauses(clause.OnConflict{
···
785
793
}
786
794
787
795
func (ix *Indexer) createMissingPostRecord(ctx context.Context, puri *util.ParsedUri) (*models.FeedPost, error) {
788
-
log.Warn("creating missing post record")
796
+
ix.log.Warn("creating missing post record")
789
797
ai, err := ix.GetUserOrMissing(ctx, puri.Did)
790
798
if err != nil {
791
799
return nil, err
···
807
815
if post.Reply != nil {
808
816
replyto, err := ix.GetPost(ctx, post.Reply.Parent.Uri)
809
817
if err != nil {
810
-
log.Error("probably shouldn't error when processing a reply to a not-found post")
818
+
ix.log.Error("probably shouldn't error when processing a reply to a not-found post")
811
819
return err
812
820
}
813
821
+5
-1
indexer/keymgr.go
+5
-1
indexer/keymgr.go
···
3
3
import (
4
4
"context"
5
5
"fmt"
6
+
"log/slog"
6
7
7
8
did "github.com/whyrusleeping/go-did"
8
9
"go.opentelemetry.io/otel"
···
12
13
didr DidResolver
13
14
14
15
signingKey *did.PrivKey
16
+
17
+
log *slog.Logger
15
18
}
16
19
17
20
type DidResolver interface {
···
22
25
return &KeyManager{
23
26
didr: didr,
24
27
signingKey: k,
28
+
log: slog.Default().With("system", "indexer"),
25
29
}
26
30
}
27
31
···
36
40
37
41
err = k.Verify(msg, sig)
38
42
if err != nil {
39
-
log.Warnw("signature failed to verify", "err", err, "did", did, "pubKey", k, "sigBytes", sig, "msgBytes", msg)
43
+
km.log.Warn("signature failed to verify", "err", err, "did", did, "pubKey", k, "sigBytes", sig, "msgBytes", msg)
40
44
}
41
45
return err
42
46
}
+12
-2
indexer/metrics.go
+12
-2
indexer/metrics.go
···
25
25
Help: "Number of repos fetched",
26
26
}, []string{"status"})
27
27
28
-
var catchupEventsEnqueued = promauto.NewCounter(prometheus.CounterOpts{
28
+
var catchupEventsEnqueued = promauto.NewCounterVec(prometheus.CounterOpts{
29
29
Name: "indexer_catchup_events_enqueued",
30
30
Help: "Number of catchup events enqueued",
31
-
})
31
+
}, []string{"how"})
32
32
33
33
var catchupEventsProcessed = promauto.NewCounter(prometheus.CounterOpts{
34
34
Name: "indexer_catchup_events_processed",
35
35
Help: "Number of catchup events processed",
36
36
})
37
+
38
+
var catchupEventsFailed = promauto.NewCounterVec(prometheus.CounterOpts{
39
+
Name: "indexer_catchup_events_failed",
40
+
Help: "Number of catchup events processed",
41
+
}, []string{"err"})
42
+
43
+
var catchupReposGauge = promauto.NewGauge(prometheus.GaugeOpts{
44
+
Name: "indexer_catchup_repos",
45
+
Help: "Number of repos waiting on catchup",
46
+
})
+2
-1
indexer/posts_test.go
+2
-1
indexer/posts_test.go
···
50
50
t.Fatal(err)
51
51
}
52
52
53
-
cs, err := carstore.NewCarStore(cardb, cspath)
53
+
cs, err := carstore.NewCarStore(cardb, []string{cspath})
54
54
if err != nil {
55
55
t.Fatal(err)
56
56
}
···
81
81
if ix.dir != "" {
82
82
_ = os.RemoveAll(ix.dir)
83
83
}
84
+
ix.ix.Shutdown()
84
85
}
85
86
86
87
// TODO: dedupe this out into some testing utility package
+11
-5
indexer/repofetch.go
+11
-5
indexer/repofetch.go
···
7
7
"fmt"
8
8
"io"
9
9
"io/fs"
10
+
"log/slog"
10
11
"sync"
11
12
12
13
"github.com/bluesky-social/indigo/api/atproto"
···
27
28
Limiters: make(map[uint]*rate.Limiter),
28
29
ApplyPDSClientSettings: func(*xrpc.Client) {},
29
30
MaxConcurrency: maxConcurrency,
31
+
log: slog.Default().With("system", "indexer"),
30
32
}
31
33
}
32
34
···
40
42
MaxConcurrency int
41
43
42
44
ApplyPDSClientSettings func(*xrpc.Client)
45
+
46
+
log *slog.Logger
43
47
}
44
48
45
49
func (rf *RepoFetcher) GetLimiter(pdsID uint) *rate.Limiter {
···
50
54
}
51
55
52
56
func (rf *RepoFetcher) GetOrCreateLimiter(pdsID uint, pdsrate float64) *rate.Limiter {
53
-
rf.LimitMux.RLock()
54
-
defer rf.LimitMux.RUnlock()
57
+
rf.LimitMux.Lock()
58
+
defer rf.LimitMux.Unlock()
55
59
56
60
lim, ok := rf.Limiters[pdsID]
57
61
if !ok {
···
84
88
// Wait to prevent DOSing the PDS when connecting to a new stream with lots of active repos
85
89
limiter.Wait(ctx)
86
90
87
-
log.Debugw("SyncGetRepo", "did", did, "since", rev)
91
+
rf.log.Debug("SyncGetRepo", "did", did, "since", rev)
88
92
// TODO: max size on these? A malicious PDS could just send us a petabyte sized repo here and kill us
89
93
repo, err := atproto.SyncGetRepo(ctx, c, did, rev)
90
94
if err != nil {
···
107
111
108
112
var pds models.PDS
109
113
if err := rf.db.First(&pds, "id = ?", ai.PDS).Error; err != nil {
114
+
catchupEventsFailed.WithLabelValues("nopds").Inc()
110
115
return fmt.Errorf("expected to find pds record (%d) in db for crawling one of their users: %w", ai.PDS, err)
111
116
}
112
117
113
118
rev, err := rf.repoman.GetRepoRev(ctx, ai.Uid)
114
119
if err != nil && !isNotFound(err) {
120
+
catchupEventsFailed.WithLabelValues("noroot").Inc()
115
121
return fmt.Errorf("failed to get repo root: %w", err)
116
122
}
117
123
···
123
129
for i, j := range job.catchup {
124
130
catchupEventsProcessed.Inc()
125
131
if err := rf.repoman.HandleExternalUserEvent(ctx, pds.ID, ai.Uid, ai.Did, j.evt.Since, j.evt.Rev, j.evt.Blocks, j.evt.Ops); err != nil {
126
-
log.Errorw("buffered event catchup failed", "error", err, "did", ai.Did, "i", i, "jobCount", len(job.catchup), "seq", j.evt.Seq)
132
+
rf.log.Error("buffered event catchup failed", "error", err, "did", ai.Did, "i", i, "jobCount", len(job.catchup), "seq", j.evt.Seq)
127
133
resync = true // fall back to a repo sync
128
134
break
129
135
}
···
151
157
span.RecordError(err)
152
158
153
159
if ipld.IsNotFound(err) || errors.Is(err, io.EOF) || errors.Is(err, fs.ErrNotExist) {
154
-
log.Errorw("partial repo fetch was missing data", "did", ai.Did, "pds", pds.Host, "rev", rev)
160
+
rf.log.Error("partial repo fetch was missing data", "did", ai.Did, "pds", pds.Host, "rev", rev)
155
161
repo, err := rf.fetchRepo(ctx, c, &pds, ai.Did, "")
156
162
if err != nil {
157
163
return err
+1
-1
lex/type_schema.go
+1
-1
lex/type_schema.go
···
223
223
}
224
224
returndef = fmt.Sprintf("(*%s.%s, error)", impname, outname)
225
225
case "application/cbor", "application/vnd.ipld.car", "*/*":
226
-
returndef = fmt.Sprintf("(io.Reader, error)")
226
+
returndef = "(io.Reader, error)"
227
227
default:
228
228
return fmt.Errorf("unrecognized output encoding (handler stub): %q", s.Output.Encoding)
229
229
}
+11
-11
mst/cbor_gen.go
+11
-11
mst/cbor_gen.go
···
18
18
var _ = math.E
19
19
var _ = sort.Sort
20
20
21
-
func (t *nodeData) MarshalCBOR(w io.Writer) error {
21
+
func (t *NodeData) MarshalCBOR(w io.Writer) error {
22
22
if t == nil {
23
23
_, err := w.Write(cbg.CborNull)
24
24
return err
···
30
30
return err
31
31
}
32
32
33
-
// t.Entries ([]mst.treeEntry) (slice)
33
+
// t.Entries ([]mst.TreeEntry) (slice)
34
34
if len("e") > 1000000 {
35
35
return xerrors.Errorf("Value in field \"e\" was too long")
36
36
}
···
81
81
return nil
82
82
}
83
83
84
-
func (t *nodeData) UnmarshalCBOR(r io.Reader) (err error) {
85
-
*t = nodeData{}
84
+
func (t *NodeData) UnmarshalCBOR(r io.Reader) (err error) {
85
+
*t = NodeData{}
86
86
87
87
cr := cbg.NewCborReader(r)
88
88
···
101
101
}
102
102
103
103
if extra > cbg.MaxLength {
104
-
return fmt.Errorf("nodeData: map struct too large (%d)", extra)
104
+
return fmt.Errorf("NodeData: map struct too large (%d)", extra)
105
105
}
106
106
107
107
n := extra
···
122
122
}
123
123
124
124
switch string(nameBuf[:nameLen]) {
125
-
// t.Entries ([]mst.treeEntry) (slice)
125
+
// t.Entries ([]mst.TreeEntry) (slice)
126
126
case "e":
127
127
128
128
maj, extra, err = cr.ReadHeader()
···
139
139
}
140
140
141
141
if extra > 0 {
142
-
t.Entries = make([]treeEntry, extra)
142
+
t.Entries = make([]TreeEntry, extra)
143
143
}
144
144
145
145
for i := 0; i < int(extra); i++ {
···
195
195
196
196
return nil
197
197
}
198
-
func (t *treeEntry) MarshalCBOR(w io.Writer) error {
198
+
func (t *TreeEntry) MarshalCBOR(w io.Writer) error {
199
199
if t == nil {
200
200
_, err := w.Write(cbg.CborNull)
201
201
return err
···
294
294
return nil
295
295
}
296
296
297
-
func (t *treeEntry) UnmarshalCBOR(r io.Reader) (err error) {
298
-
*t = treeEntry{}
297
+
func (t *TreeEntry) UnmarshalCBOR(r io.Reader) (err error) {
298
+
*t = TreeEntry{}
299
299
300
300
cr := cbg.NewCborReader(r)
301
301
···
314
314
}
315
315
316
316
if extra > cbg.MaxLength {
317
-
return fmt.Errorf("treeEntry: map struct too large (%d)", extra)
317
+
return fmt.Errorf("TreeEntry: map struct too large (%d)", extra)
318
318
}
319
319
320
320
n := extra
+8
-8
mst/mst.go
+8
-8
mst/mst.go
···
105
105
// the CBOR codec.
106
106
func CBORTypes() []reflect.Type {
107
107
return []reflect.Type{
108
-
reflect.TypeOf(nodeData{}),
109
-
reflect.TypeOf(treeEntry{}),
108
+
reflect.TypeOf(NodeData{}),
109
+
reflect.TypeOf(TreeEntry{}),
110
110
}
111
111
}
112
112
113
113
// MST tree node as gets serialized to CBOR. Note that the CBOR fields are all
114
114
// single-character.
115
-
type nodeData struct {
115
+
type NodeData struct {
116
116
Left *cid.Cid `cborgen:"l"` // [nullable] pointer to lower-level subtree to the "left" of this path/key
117
-
Entries []treeEntry `cborgen:"e"` // ordered list of entries at this node
117
+
Entries []TreeEntry `cborgen:"e"` // ordered list of entries at this node
118
118
}
119
119
120
-
// treeEntry are elements of nodeData's Entries.
121
-
type treeEntry struct {
120
+
// TreeEntry are elements of NodeData's Entries.
121
+
type TreeEntry struct {
122
122
PrefixLen int64 `cborgen:"p"` // count of characters shared with previous path/key in tree
123
123
KeySuffix []byte `cborgen:"k"` // remaining part of path/key (appended to "previous key")
124
124
Val cid.Cid `cborgen:"v"` // CID pointer at this path/key
···
189
189
// otherwise this is a virtual/pointer struct and we need to hydrate from
190
190
// blockstore before returning entries
191
191
if mst.pointer != cid.Undef {
192
-
var nd nodeData
192
+
var nd NodeData
193
193
if err := mst.cst.Get(ctx, mst.pointer, &nd); err != nil {
194
194
return nil, err
195
195
}
···
210
210
}
211
211
212
212
// golang-specific helper that calls in to deserializeNodeData
213
-
func entriesFromNodeData(ctx context.Context, nd *nodeData, cst cbor.IpldStore) ([]nodeEntry, error) {
213
+
func entriesFromNodeData(ctx context.Context, nd *NodeData, cst cbor.IpldStore) ([]nodeEntry, error) {
214
214
layer := -1
215
215
if len(nd.Entries) > 0 {
216
216
// NOTE(bnewbold): can compute the layer on the first KeySuffix, because for the first entry that field is a complete key
+2
-2
mst/mst_interop_test.go
+2
-2
mst/mst_interop_test.go
+4
-4
mst/mst_util.go
+4
-4
mst/mst_util.go
···
66
66
}
67
67
68
68
// Typescript: deserializeNodeData(storage, data, layer)
69
-
func deserializeNodeData(ctx context.Context, cst cbor.IpldStore, nd *nodeData, layer int) ([]nodeEntry, error) {
69
+
func deserializeNodeData(ctx context.Context, cst cbor.IpldStore, nd *NodeData, layer int) ([]nodeEntry, error) {
70
70
entries := []nodeEntry{}
71
71
if nd.Left != nil {
72
72
// Note: like Typescript, this is actually a lazy load
···
111
111
}
112
112
113
113
// Typescript: serializeNodeData(entries) -> NodeData
114
-
func serializeNodeData(entries []nodeEntry) (*nodeData, error) {
115
-
var data nodeData
114
+
func serializeNodeData(entries []nodeEntry) (*NodeData, error) {
115
+
var data NodeData
116
116
117
117
i := 0
118
118
if len(entries) > 0 && entries[0].isTree() {
···
157
157
}
158
158
159
159
prefixLen := countPrefixLen(lastKey, leaf.Key)
160
-
data.Entries = append(data.Entries, treeEntry{
160
+
data.Entries = append(data.Entries, TreeEntry{
161
161
PrefixLen: int64(prefixLen),
162
162
KeySuffix: []byte(leaf.Key)[prefixLen:],
163
163
Val: leaf.Val,
+27
pds/data/types.go
+27
pds/data/types.go
···
1
+
package data
2
+
3
+
import (
4
+
"github.com/bluesky-social/indigo/models"
5
+
"gorm.io/gorm"
6
+
"time"
7
+
)
8
+
9
+
type User struct {
10
+
ID models.Uid `gorm:"primarykey"`
11
+
CreatedAt time.Time
12
+
UpdatedAt time.Time
13
+
DeletedAt gorm.DeletedAt `gorm:"index"`
14
+
Handle string `gorm:"uniqueIndex"`
15
+
Password string
16
+
RecoveryKey string
17
+
Email string
18
+
Did string `gorm:"uniqueIndex"`
19
+
PDS uint
20
+
}
21
+
22
+
type Peering struct {
23
+
gorm.Model
24
+
Host string
25
+
Did string
26
+
Approved bool
27
+
}
+6
-2
pds/feedgen.go
+6
-2
pds/feedgen.go
···
3
3
import (
4
4
"context"
5
5
"fmt"
6
+
"log/slog"
6
7
"sort"
7
8
"strings"
8
9
"time"
···
22
23
ix *indexer.Indexer
23
24
24
25
readRecord ReadRecordFunc
26
+
27
+
log *slog.Logger
25
28
}
26
29
27
-
func NewFeedGenerator(db *gorm.DB, ix *indexer.Indexer, readRecord ReadRecordFunc) (*FeedGenerator, error) {
30
+
func NewFeedGenerator(db *gorm.DB, ix *indexer.Indexer, readRecord ReadRecordFunc, log *slog.Logger) (*FeedGenerator, error) {
28
31
return &FeedGenerator{
29
32
db: db,
30
33
ix: ix,
31
34
readRecord: readRecord,
35
+
log: log,
32
36
}, nil
33
37
}
34
38
···
355
359
356
360
func (fg *FeedGenerator) GetVotes(ctx context.Context, uri string, pcid cid.Cid, limit int, before string) ([]*HydratedVote, error) {
357
361
if before != "" {
358
-
log.Warn("not respecting 'before' yet")
362
+
fg.log.Warn("not respecting 'before' yet")
359
363
}
360
364
361
365
p, err := fg.ix.GetPost(ctx, uri)
+1
-1
pds/handlers_test.go
+1
-1
pds/handlers_test.go
+13
-36
pds/server.go
+13
-36
pds/server.go
···
5
5
"database/sql"
6
6
"errors"
7
7
"fmt"
8
+
"log/slog"
8
9
"net"
9
10
"net/http"
10
11
"net/mail"
···
14
15
15
16
"github.com/bluesky-social/indigo/api/atproto"
16
17
comatproto "github.com/bluesky-social/indigo/api/atproto"
17
-
bsky "github.com/bluesky-social/indigo/api/bsky"
18
18
"github.com/bluesky-social/indigo/carstore"
19
19
"github.com/bluesky-social/indigo/events"
20
20
"github.com/bluesky-social/indigo/indexer"
21
21
lexutil "github.com/bluesky-social/indigo/lex/util"
22
22
"github.com/bluesky-social/indigo/models"
23
23
"github.com/bluesky-social/indigo/notifs"
24
+
pdsdata "github.com/bluesky-social/indigo/pds/data"
24
25
"github.com/bluesky-social/indigo/plc"
25
26
"github.com/bluesky-social/indigo/repomgr"
26
27
"github.com/bluesky-social/indigo/util"
···
29
30
gojwt "github.com/golang-jwt/jwt"
30
31
"github.com/gorilla/websocket"
31
32
"github.com/ipfs/go-cid"
32
-
logging "github.com/ipfs/go-log"
33
33
"github.com/labstack/echo/v4"
34
34
"github.com/labstack/echo/v4/middleware"
35
35
"github.com/lestrrat-go/jwx/v2/jwt"
···
37
37
"gorm.io/gorm"
38
38
)
39
39
40
-
var log = logging.Logger("pds")
41
-
42
40
type Server struct {
43
41
db *gorm.DB
44
42
cs carstore.CarStore
···
56
54
serviceUrl string
57
55
58
56
plc plc.PLCClient
57
+
58
+
log *slog.Logger
59
59
}
60
60
61
61
// serverListenerBootTimeout is how long to wait for the requested server socket
···
96
96
serviceUrl: serviceUrl,
97
97
jwtSigningKey: jwtkey,
98
98
enforcePeering: false,
99
+
100
+
log: slog.Default().With("system", "pds"),
99
101
}
100
102
101
103
repoman.SetEventHandler(func(ctx context.Context, evt *repomgr.RepoEvent) {
102
104
if err := ix.HandleRepoEvent(ctx, evt); err != nil {
103
-
log.Errorw("handle repo event failed", "user", evt.User, "err", err)
105
+
s.log.Error("handle repo event failed", "user", evt.User, "err", err)
104
106
}
105
107
}, true)
106
108
107
109
//ix.SendRemoteFollow = s.sendRemoteFollow
108
110
ix.CreateExternalUser = s.createExternalUser
109
111
110
-
feedgen, err := NewFeedGenerator(db, ix, s.readRecordFunc)
112
+
feedgen, err := NewFeedGenerator(db, ix, s.readRecordFunc, s.log)
111
113
if err != nil {
112
114
return nil, err
113
115
}
···
199
201
handle = hurl.Host
200
202
}
201
203
202
-
profile, err := bsky.ActorGetProfile(ctx, c, did)
203
-
if err != nil {
204
-
return nil, err
205
-
}
206
-
207
-
if handle != profile.Handle {
208
-
return nil, fmt.Errorf("mismatch in handle between did document and pds profile (%s != %s)", handle, profile.Handle)
209
-
}
210
-
211
204
// TODO: request this users info from their server to fill out our data...
212
205
u := User{
213
206
Handle: handle,
···
224
217
subj := &models.ActorInfo{
225
218
Uid: u.ID,
226
219
Handle: sql.NullString{String: handle, Valid: true},
227
-
DisplayName: *profile.DisplayName,
220
+
DisplayName: "missing display name",
228
221
Did: did,
229
222
Type: "",
230
223
PDS: peering.ID,
···
433
426
434
427
func (s *Server) HandleHealthCheck(c echo.Context) error {
435
428
if err := s.db.Exec("SELECT 1").Error; err != nil {
436
-
log.Errorf("healthcheck can't connect to database: %v", err)
429
+
s.log.Error("healthcheck can't connect to database", "err", err)
437
430
return c.JSON(500, HealthStatus{Status: "error", Message: "can't connect to database"})
438
431
} else {
439
432
return c.JSON(200, HealthStatus{Status: "ok"})
···
456
449
return c.String(200, u.Did)
457
450
}
458
451
459
-
type User struct {
460
-
ID models.Uid `gorm:"primarykey"`
461
-
CreatedAt time.Time
462
-
UpdatedAt time.Time
463
-
DeletedAt gorm.DeletedAt `gorm:"index"`
464
-
Handle string `gorm:"uniqueIndex"`
465
-
Password string
466
-
RecoveryKey string
467
-
Email string
468
-
Did string `gorm:"uniqueIndex"`
469
-
PDS uint
470
-
}
452
+
type User = pdsdata.User
471
453
472
454
type RefreshToken struct {
473
455
gorm.Model
···
636
618
panic("nyi")
637
619
}
638
620
639
-
type Peering struct {
640
-
gorm.Model
641
-
Host string
642
-
Did string
643
-
Approved bool
644
-
}
621
+
type Peering = pdsdata.Peering
645
622
646
623
func (s *Server) EventsHandler(c echo.Context) error {
647
624
conn, err := websocket.Upgrade(c.Response().Writer, c.Request(), c.Response().Header(), 1<<10, 1<<10)
···
741
718
func (s *Server) UpdateUserHandle(ctx context.Context, u *User, handle string) error {
742
719
if u.Handle == handle {
743
720
// no change? move on
744
-
log.Warnw("attempted to change handle to current handle", "did", u.Did, "handle", handle)
721
+
s.log.Warn("attempted to change handle to current handle", "did", u.Did, "handle", handle)
745
722
return nil
746
723
}
747
724
+90
plc/memcached.go
+90
plc/memcached.go
···
1
+
package plc
2
+
3
+
import (
4
+
"context"
5
+
"encoding/json"
6
+
"github.com/bradfitz/gomemcache/memcache"
7
+
"go.opentelemetry.io/otel/attribute"
8
+
"time"
9
+
10
+
"github.com/bluesky-social/indigo/did"
11
+
"go.opentelemetry.io/otel"
12
+
)
13
+
14
+
type MemcachedDidResolver struct {
15
+
mcd *memcache.Client
16
+
res did.Resolver
17
+
maxAge int32
18
+
}
19
+
20
+
func NewMemcachedDidResolver(res did.Resolver, maxAge time.Duration, servers []string) *MemcachedDidResolver {
21
+
expiry := int32(0)
22
+
if maxAge.Seconds() > (30 * 24 * 60 * 60) {
23
+
// clamp expiry at 30 days minus a minute for memcached
24
+
expiry = (30 * 24 * 60 * 60) - 60
25
+
} else {
26
+
expiry = int32(maxAge.Seconds())
27
+
}
28
+
client := memcache.New(servers...)
29
+
return &MemcachedDidResolver{
30
+
mcd: client,
31
+
res: res,
32
+
maxAge: expiry,
33
+
}
34
+
}
35
+
36
+
func (r *MemcachedDidResolver) FlushCacheFor(didstr string) {
37
+
r.mcd.Delete(didstr)
38
+
r.res.FlushCacheFor(didstr)
39
+
}
40
+
41
+
func (r *MemcachedDidResolver) tryCache(didstr string) (*did.Document, bool) {
42
+
ob, err := r.mcd.Get(didstr)
43
+
if (ob == nil) || (err != nil) {
44
+
return nil, false
45
+
}
46
+
var doc did.Document
47
+
err = json.Unmarshal(ob.Value, &doc)
48
+
if err != nil {
49
+
// TODO: log error?
50
+
return nil, false
51
+
}
52
+
53
+
return &doc, true
54
+
}
55
+
56
+
func (r *MemcachedDidResolver) putCache(did string, doc *did.Document) {
57
+
blob, err := json.Marshal(doc)
58
+
if err != nil {
59
+
// TODO: log error
60
+
return
61
+
}
62
+
item := memcache.Item{
63
+
Key: did,
64
+
Value: blob,
65
+
Expiration: int32(r.maxAge),
66
+
}
67
+
r.mcd.Set(&item)
68
+
}
69
+
70
+
func (r *MemcachedDidResolver) GetDocument(ctx context.Context, didstr string) (*did.Document, error) {
71
+
ctx, span := otel.Tracer("cacheResolver").Start(ctx, "getDocument")
72
+
defer span.End()
73
+
74
+
doc, ok := r.tryCache(didstr)
75
+
if ok {
76
+
span.SetAttributes(attribute.Bool("cache", true))
77
+
memcacheHitsTotal.Inc()
78
+
return doc, nil
79
+
}
80
+
memcacheMissesTotal.Inc()
81
+
span.SetAttributes(attribute.Bool("cache", false))
82
+
83
+
doc, err := r.res.GetDocument(ctx, didstr)
84
+
if err != nil {
85
+
return nil, err
86
+
}
87
+
88
+
r.putCache(didstr, doc)
89
+
return doc, nil
90
+
}
+10
plc/metrics.go
+10
plc/metrics.go
···
14
14
Name: "plc_cache_misses_total",
15
15
Help: "Total number of cache misses",
16
16
})
17
+
18
+
var memcacheHitsTotal = promauto.NewCounter(prometheus.CounterOpts{
19
+
Name: "plc_memcache_hits_total",
20
+
Help: "Total number of cache hits",
21
+
})
22
+
23
+
var memcacheMissesTotal = promauto.NewCounter(prometheus.CounterOpts{
24
+
Name: "plc_memcache_misses_total",
25
+
Help: "Total number of cache misses",
26
+
})
+9
-1
repomgr/bench_test.go
+9
-1
repomgr/bench_test.go
···
54
54
b.Fatal(err)
55
55
}
56
56
57
-
cs, err := carstore.NewCarStore(cardb, cspath)
57
+
// TODO: constructor for 'either type'
58
+
/*
59
+
cs, err := carstore.NewCarStore(cardb, []string{cspath})
60
+
if err != nil {
61
+
b.Fatal(err)
62
+
}
63
+
*/
64
+
cs, err := carstore.NewNonArchivalCarstore(cardb)
58
65
if err != nil {
59
66
b.Fatal(err)
60
67
}
61
68
62
69
repoman := NewRepoManager(cs, &util.FakeKeyManager{})
70
+
repoman.noArchive = true
63
71
64
72
ctx := context.TODO()
65
73
if err := repoman.InitNewActor(ctx, 1, "hello.world", "did:foo:bar", "catdog", "", ""); err != nil {
+20
-12
repomgr/ingest_test.go
+20
-12
repomgr/ingest_test.go
···
50
50
t.Fatal(err)
51
51
}
52
52
53
-
cs, err := carstore.NewCarStore(cardb, cspath)
53
+
cs, err := carstore.NewCarStore(cardb, []string{cspath})
54
54
if err != nil {
55
55
t.Fatal(err)
56
56
}
···
69
69
}
70
70
}
71
71
72
-
func testCarstore(t *testing.T, dir string) carstore.CarStore {
72
+
func testCarstore(t *testing.T, dir string, archive bool) carstore.CarStore {
73
73
cardb, err := gorm.Open(sqlite.Open(filepath.Join(dir, "car.sqlite")))
74
74
if err != nil {
75
75
t.Fatal(err)
···
80
80
t.Fatal(err)
81
81
}
82
82
83
-
cs, err := carstore.NewCarStore(cardb, cspath)
84
-
if err != nil {
85
-
t.Fatal(err)
86
-
}
83
+
if archive {
84
+
cs, err := carstore.NewCarStore(cardb, []string{cspath})
85
+
if err != nil {
86
+
t.Fatal(err)
87
+
}
88
+
return cs
89
+
} else {
90
+
cs, err := carstore.NewNonArchivalCarstore(cardb)
91
+
if err != nil {
92
+
t.Fatal(err)
93
+
}
87
94
88
-
return cs
95
+
return cs
96
+
}
89
97
}
90
98
91
99
func TestIngestWithGap(t *testing.T) {
···
106
114
Uid: 1,
107
115
})
108
116
109
-
cs := testCarstore(t, dir)
117
+
cs := testCarstore(t, dir, true)
110
118
111
119
repoman := NewRepoManager(cs, &util.FakeKeyManager{})
112
120
···
114
122
if err != nil {
115
123
t.Fatal(err)
116
124
}
117
-
cs2 := testCarstore(t, dir2)
125
+
cs2 := testCarstore(t, dir2, true)
118
126
119
127
var since *string
120
128
ctx := context.TODO()
···
198
206
Uid: 1,
199
207
})
200
208
201
-
cs := testCarstore(t, dir)
209
+
cs := testCarstore(t, dir, true)
202
210
203
211
repoman := NewRepoManager(cs, &util.FakeKeyManager{})
204
212
···
208
216
}
209
217
210
218
p1, _, err := repoman.CreateRecord(ctx, 1, "app.bsky.feed.post", &bsky.FeedPost{
211
-
Text: fmt.Sprintf("hello friend"),
219
+
Text: "hello friend",
212
220
})
213
221
if err != nil {
214
222
t.Fatal(err)
215
223
}
216
224
217
225
p2, _, err := repoman.CreateRecord(ctx, 1, "app.bsky.feed.post", &bsky.FeedPost{
218
-
Text: fmt.Sprintf("hello friend"),
226
+
Text: "hello friend",
219
227
})
220
228
if err != nil {
221
229
t.Fatal(err)
+18
repomgr/metrics.go
+18
repomgr/metrics.go
···
9
9
Name: "repomgr_repo_ops_imported",
10
10
Help: "Number of repo ops imported",
11
11
})
12
+
13
+
var openAndSigCheckDuration = promauto.NewHistogram(prometheus.HistogramOpts{
14
+
Name: "repomgr_open_and_sig_check_duration",
15
+
Help: "Duration of opening and signature check",
16
+
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
17
+
})
18
+
19
+
var calcDiffDuration = promauto.NewHistogram(prometheus.HistogramOpts{
20
+
Name: "repomgr_calc_diff_duration",
21
+
Help: "Duration of calculating diff",
22
+
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
23
+
})
24
+
25
+
var writeCarSliceDuration = promauto.NewHistogram(prometheus.HistogramOpts{
26
+
Name: "repomgr_write_car_slice_duration",
27
+
Help: "Duration of writing car slice",
28
+
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15),
29
+
})
+133
-14
repomgr/repomgr.go
+133
-14
repomgr/repomgr.go
···
6
6
"errors"
7
7
"fmt"
8
8
"io"
9
+
"log/slog"
9
10
"strings"
10
11
"sync"
12
+
"time"
11
13
12
14
atproto "github.com/bluesky-social/indigo/api/atproto"
13
15
bsky "github.com/bluesky-social/indigo/api/bsky"
···
23
25
"github.com/ipfs/go-datastore"
24
26
blockstore "github.com/ipfs/go-ipfs-blockstore"
25
27
ipld "github.com/ipfs/go-ipld-format"
26
-
logging "github.com/ipfs/go-log/v2"
27
28
"github.com/ipld/go-car"
28
29
cbg "github.com/whyrusleeping/cbor-gen"
29
30
"go.opentelemetry.io/otel"
···
31
32
"gorm.io/gorm"
32
33
)
33
34
34
-
var log = logging.Logger("repomgr")
35
+
func NewRepoManager(cs carstore.CarStore, kmgr KeyManager) *RepoManager {
35
36
36
-
func NewRepoManager(cs carstore.CarStore, kmgr KeyManager) *RepoManager {
37
+
var noArchive bool
38
+
if _, ok := cs.(*carstore.NonArchivalCarstore); ok {
39
+
noArchive = true
40
+
}
37
41
38
42
return &RepoManager{
39
43
cs: cs,
40
44
userLocks: make(map[models.Uid]*userLock),
41
45
kmgr: kmgr,
46
+
log: slog.Default().With("system", "repomgr"),
47
+
noArchive: noArchive,
42
48
}
43
49
}
44
50
···
61
67
62
68
events func(context.Context, *RepoEvent)
63
69
hydrateRecords bool
70
+
71
+
log *slog.Logger
72
+
noArchive bool
64
73
}
65
74
66
75
type ActorInfo struct {
···
467
476
return cid.Undef, nil, err
468
477
}
469
478
470
-
_, _, err = r.GetRecord(ctx, collection+"/"+rkey)
479
+
_, _, err = r.GetRecordBytes(ctx, collection+"/"+rkey)
471
480
if err != nil {
472
481
return cid.Undef, nil, err
473
482
}
···
528
537
}
529
538
530
539
func (rm *RepoManager) HandleExternalUserEvent(ctx context.Context, pdsid uint, uid models.Uid, did string, since *string, nrev string, carslice []byte, ops []*atproto.SyncSubscribeRepos_RepoOp) error {
540
+
if rm.noArchive {
541
+
return rm.handleExternalUserEventNoArchive(ctx, pdsid, uid, did, since, nrev, carslice, ops)
542
+
} else {
543
+
return rm.handleExternalUserEventArchive(ctx, pdsid, uid, did, since, nrev, carslice, ops)
544
+
}
545
+
}
546
+
547
+
func (rm *RepoManager) handleExternalUserEventNoArchive(ctx context.Context, pdsid uint, uid models.Uid, did string, since *string, nrev string, carslice []byte, ops []*atproto.SyncSubscribeRepos_RepoOp) error {
531
548
ctx, span := otel.Tracer("repoman").Start(ctx, "HandleExternalUserEvent")
532
549
defer span.End()
533
550
534
551
span.SetAttributes(attribute.Int64("uid", int64(uid)))
535
552
536
-
log.Debugw("HandleExternalUserEvent", "pds", pdsid, "uid", uid, "since", since, "nrev", nrev)
553
+
rm.log.Debug("HandleExternalUserEvent", "pds", pdsid, "uid", uid, "since", since, "nrev", nrev)
554
+
555
+
unlock := rm.lockUser(ctx, uid)
556
+
defer unlock()
557
+
558
+
start := time.Now()
559
+
root, ds, err := rm.cs.ImportSlice(ctx, uid, since, carslice)
560
+
if err != nil {
561
+
return fmt.Errorf("importing external carslice: %w", err)
562
+
}
563
+
564
+
r, err := repo.OpenRepo(ctx, ds, root)
565
+
if err != nil {
566
+
return fmt.Errorf("opening external user repo (%d, root=%s): %w", uid, root, err)
567
+
}
568
+
569
+
if err := rm.CheckRepoSig(ctx, r, did); err != nil {
570
+
return fmt.Errorf("check repo sig: %w", err)
571
+
}
572
+
openAndSigCheckDuration.Observe(time.Since(start).Seconds())
573
+
574
+
evtops := make([]RepoOp, 0, len(ops))
575
+
for _, op := range ops {
576
+
parts := strings.SplitN(op.Path, "/", 2)
577
+
if len(parts) != 2 {
578
+
return fmt.Errorf("invalid rpath in mst diff, must have collection and rkey")
579
+
}
580
+
581
+
switch EventKind(op.Action) {
582
+
case EvtKindCreateRecord:
583
+
rop := RepoOp{
584
+
Kind: EvtKindCreateRecord,
585
+
Collection: parts[0],
586
+
Rkey: parts[1],
587
+
RecCid: (*cid.Cid)(op.Cid),
588
+
}
589
+
590
+
if rm.hydrateRecords {
591
+
_, rec, err := r.GetRecord(ctx, op.Path)
592
+
if err != nil {
593
+
return fmt.Errorf("reading changed record from car slice: %w", err)
594
+
}
595
+
rop.Record = rec
596
+
}
597
+
598
+
evtops = append(evtops, rop)
599
+
case EvtKindUpdateRecord:
600
+
rop := RepoOp{
601
+
Kind: EvtKindUpdateRecord,
602
+
Collection: parts[0],
603
+
Rkey: parts[1],
604
+
RecCid: (*cid.Cid)(op.Cid),
605
+
}
606
+
607
+
if rm.hydrateRecords {
608
+
_, rec, err := r.GetRecord(ctx, op.Path)
609
+
if err != nil {
610
+
return fmt.Errorf("reading changed record from car slice: %w", err)
611
+
}
612
+
613
+
rop.Record = rec
614
+
}
615
+
616
+
evtops = append(evtops, rop)
617
+
case EvtKindDeleteRecord:
618
+
evtops = append(evtops, RepoOp{
619
+
Kind: EvtKindDeleteRecord,
620
+
Collection: parts[0],
621
+
Rkey: parts[1],
622
+
})
623
+
default:
624
+
return fmt.Errorf("unrecognized external user event kind: %q", op.Action)
625
+
}
626
+
}
627
+
628
+
if rm.events != nil {
629
+
rm.events(ctx, &RepoEvent{
630
+
User: uid,
631
+
//OldRoot: prev,
632
+
NewRoot: root,
633
+
Rev: nrev,
634
+
Since: since,
635
+
Ops: evtops,
636
+
RepoSlice: carslice,
637
+
PDS: pdsid,
638
+
})
639
+
}
640
+
641
+
return nil
642
+
}
643
+
644
+
func (rm *RepoManager) handleExternalUserEventArchive(ctx context.Context, pdsid uint, uid models.Uid, did string, since *string, nrev string, carslice []byte, ops []*atproto.SyncSubscribeRepos_RepoOp) error {
645
+
ctx, span := otel.Tracer("repoman").Start(ctx, "HandleExternalUserEvent")
646
+
defer span.End()
647
+
648
+
span.SetAttributes(attribute.Int64("uid", int64(uid)))
649
+
650
+
rm.log.Debug("HandleExternalUserEvent", "pds", pdsid, "uid", uid, "since", since, "nrev", nrev)
537
651
538
652
unlock := rm.lockUser(ctx, uid)
539
653
defer unlock()
540
654
655
+
start := time.Now()
541
656
root, ds, err := rm.cs.ImportSlice(ctx, uid, since, carslice)
542
657
if err != nil {
543
658
return fmt.Errorf("importing external carslice: %w", err)
···
551
666
if err := rm.CheckRepoSig(ctx, r, did); err != nil {
552
667
return err
553
668
}
669
+
openAndSigCheckDuration.Observe(time.Since(start).Seconds())
554
670
555
671
var skipcids map[cid.Cid]bool
556
672
if ds.BaseCid().Defined() {
···
571
687
}
572
688
}
573
689
690
+
start = time.Now()
574
691
if err := ds.CalcDiff(ctx, skipcids); err != nil {
575
692
return fmt.Errorf("failed while calculating mst diff (since=%v): %w", since, err)
576
-
577
693
}
694
+
calcDiffDuration.Observe(time.Since(start).Seconds())
578
695
579
696
evtops := make([]RepoOp, 0, len(ops))
580
697
···
631
748
}
632
749
}
633
750
751
+
start = time.Now()
634
752
rslice, err := ds.CloseWithRoot(ctx, root, nrev)
635
753
if err != nil {
636
754
return fmt.Errorf("close with root: %w", err)
637
755
}
756
+
writeCarSliceDuration.Observe(time.Since(start).Seconds())
638
757
639
758
if rm.events != nil {
640
759
rm.events(ctx, &RepoEvent{
···
829
948
ops := make([]RepoOp, 0, len(diffops))
830
949
for _, op := range diffops {
831
950
repoOpsImported.Inc()
832
-
out, err := processOp(ctx, bs, op, rm.hydrateRecords)
951
+
out, err := rm.processOp(ctx, bs, op, rm.hydrateRecords)
833
952
if err != nil {
834
-
log.Errorw("failed to process repo op", "err", err, "path", op.Rpath, "repo", repoDid)
953
+
rm.log.Error("failed to process repo op", "err", err, "path", op.Rpath, "repo", repoDid)
835
954
}
836
955
837
956
if out != nil {
···
865
984
return nil
866
985
}
867
986
868
-
func processOp(ctx context.Context, bs blockstore.Blockstore, op *mst.DiffOp, hydrateRecords bool) (*RepoOp, error) {
987
+
func (rm *RepoManager) processOp(ctx context.Context, bs blockstore.Blockstore, op *mst.DiffOp, hydrateRecords bool) (*RepoOp, error) {
869
988
parts := strings.SplitN(op.Rpath, "/", 2)
870
989
if len(parts) != 2 {
871
990
return nil, fmt.Errorf("repo mst had invalid rpath: %q", op.Rpath)
···
898
1017
return nil, err
899
1018
}
900
1019
901
-
log.Warnf("failed processing repo diff: %s", err)
1020
+
rm.log.Warn("failed processing repo diff", "err", err)
902
1021
} else {
903
1022
outop.Record = rec
904
1023
}
···
954
1073
// the repos lifecycle, this will end up erroneously not including
955
1074
// them. We should compute the set of blocks needed to read any repo
956
1075
// ops that happened in the commit and use that for our 'output' blocks
957
-
cids, err := walkTree(ctx, seen, root, membs, true)
1076
+
cids, err := rm.walkTree(ctx, seen, root, membs, true)
958
1077
if err != nil {
959
1078
return fmt.Errorf("walkTree: %w", err)
960
1079
}
···
995
1114
996
1115
// walkTree returns all cids linked recursively by the root, skipping any cids
997
1116
// in the 'skip' map, and not erroring on 'not found' if prevMissing is set
998
-
func walkTree(ctx context.Context, skip map[cid.Cid]bool, root cid.Cid, bs blockstore.Blockstore, prevMissing bool) ([]cid.Cid, error) {
1117
+
func (rm *RepoManager) walkTree(ctx context.Context, skip map[cid.Cid]bool, root cid.Cid, bs blockstore.Blockstore, prevMissing bool) ([]cid.Cid, error) {
999
1118
// TODO: what if someone puts non-cbor links in their repo?
1000
1119
if root.Prefix().Codec != cid.DagCBOR {
1001
1120
return nil, fmt.Errorf("can only handle dag-cbor objects in repos (%s is %d)", root, root.Prefix().Codec)
···
1009
1128
var links []cid.Cid
1010
1129
if err := cbg.ScanForLinks(bytes.NewReader(blk.RawData()), func(c cid.Cid) {
1011
1130
if c.Prefix().Codec == cid.Raw {
1012
-
log.Debugw("skipping 'raw' CID in record", "recordCid", root, "rawCid", c)
1131
+
rm.log.Debug("skipping 'raw' CID in record", "recordCid", root, "rawCid", c)
1013
1132
return
1014
1133
}
1015
1134
if skip[c] {
···
1029
1148
1030
1149
// TODO: should do this non-recursive since i expect these may get deep
1031
1150
for _, c := range links {
1032
-
sub, err := walkTree(ctx, skip, c, bs, prevMissing)
1151
+
sub, err := rm.walkTree(ctx, skip, c, bs, prevMissing)
1033
1152
if err != nil {
1034
1153
if prevMissing && !ipld.IsNotFound(err) {
1035
1154
return nil, err
+1
search/firehose.go
+1
search/firehose.go
+1
-1
search/handlers.go
+1
-1
search/handlers.go
+3
-1
search/indexing.go
+3
-1
search/indexing.go
···
130
130
opts.SyncRequestsPerSecond = 8
131
131
}
132
132
133
-
opts.CheckoutPath = fmt.Sprintf("%s/xrpc/com.atproto.sync.getRepo", relayHTTP)
133
+
opts.RelayHost = relayHTTP
134
134
if config.IndexMaxConcurrency > 0 {
135
135
opts.ParallelRecordCreates = config.IndexMaxConcurrency
136
136
} else {
···
145
145
idx.handleDelete,
146
146
opts,
147
147
)
148
+
// reuse identity directory (for efficient caching)
149
+
bf.Directory = dir
148
150
149
151
idx.bfs = bfstore
150
152
idx.bf = bf
+1
-3
search/transform.go
+1
-3
search/transform.go
+16
splitter/metrics.go
+16
splitter/metrics.go
···
1
+
package splitter
2
+
3
+
import (
4
+
"github.com/prometheus/client_golang/prometheus"
5
+
"github.com/prometheus/client_golang/prometheus/promauto"
6
+
)
7
+
8
+
var eventsSentCounter = promauto.NewCounterVec(prometheus.CounterOpts{
9
+
Name: "spl_events_sent_counter",
10
+
Help: "The total number of events sent to consumers",
11
+
}, []string{"remote_addr", "user_agent"})
12
+
13
+
var activeClientGauge = promauto.NewGauge(prometheus.GaugeOpts{
14
+
Name: "spl_active_clients",
15
+
Help: "Current number of active clients",
16
+
})
+144
splitter/ringbuf.go
+144
splitter/ringbuf.go
···
1
+
package splitter
2
+
3
+
import (
4
+
"context"
5
+
"sync"
6
+
7
+
events "github.com/bluesky-social/indigo/events"
8
+
"github.com/bluesky-social/indigo/models"
9
+
)
10
+
11
+
func NewEventRingBuffer(chunkSize, nchunks int) *EventRingBuffer {
12
+
return &EventRingBuffer{
13
+
chunkSize: chunkSize,
14
+
maxChunkCount: nchunks,
15
+
}
16
+
}
17
+
18
+
type EventRingBuffer struct {
19
+
lk sync.Mutex
20
+
chunks []*ringChunk
21
+
chunkSize int
22
+
maxChunkCount int
23
+
24
+
broadcast func(*events.XRPCStreamEvent)
25
+
}
26
+
27
+
type ringChunk struct {
28
+
lk sync.Mutex
29
+
buf []*events.XRPCStreamEvent
30
+
}
31
+
32
+
func (rc *ringChunk) append(evt *events.XRPCStreamEvent) {
33
+
rc.lk.Lock()
34
+
defer rc.lk.Unlock()
35
+
rc.buf = append(rc.buf, evt)
36
+
}
37
+
38
+
func (rc *ringChunk) events() []*events.XRPCStreamEvent {
39
+
rc.lk.Lock()
40
+
defer rc.lk.Unlock()
41
+
return rc.buf
42
+
}
43
+
44
+
func (er *EventRingBuffer) Persist(ctx context.Context, evt *events.XRPCStreamEvent) error {
45
+
er.lk.Lock()
46
+
defer er.lk.Unlock()
47
+
48
+
if len(er.chunks) == 0 {
49
+
er.chunks = []*ringChunk{new(ringChunk)}
50
+
}
51
+
52
+
last := er.chunks[len(er.chunks)-1]
53
+
if len(last.buf) >= er.chunkSize {
54
+
last = new(ringChunk)
55
+
er.chunks = append(er.chunks, last)
56
+
if len(er.chunks) > er.maxChunkCount {
57
+
er.chunks = er.chunks[1:]
58
+
}
59
+
}
60
+
61
+
last.append(evt)
62
+
63
+
er.broadcast(evt)
64
+
return nil
65
+
}
66
+
67
+
func (er *EventRingBuffer) Flush(context.Context) error {
68
+
return nil
69
+
}
70
+
71
+
func (er *EventRingBuffer) Playback(ctx context.Context, since int64, cb func(*events.XRPCStreamEvent) error) error {
72
+
// run playback a few times to get as close to 'live' as possible before returning
73
+
for i := 0; i < 10; i++ {
74
+
n, err := er.playbackRound(ctx, since, cb)
75
+
if err != nil {
76
+
return err
77
+
}
78
+
79
+
// playback had no new events
80
+
if n-since == 0 {
81
+
return nil
82
+
}
83
+
since = n
84
+
}
85
+
86
+
return nil
87
+
}
88
+
89
+
func (er *EventRingBuffer) playbackRound(ctx context.Context, since int64, cb func(*events.XRPCStreamEvent) error) (int64, error) {
90
+
// grab a snapshot of the current chunks
91
+
er.lk.Lock()
92
+
chunks := er.chunks
93
+
er.lk.Unlock()
94
+
95
+
i := len(chunks) - 1
96
+
for ; i >= 0; i-- {
97
+
c := chunks[i]
98
+
evts := c.events()
99
+
if since > events.SequenceForEvent(evts[len(evts)-1]) {
100
+
i++
101
+
break
102
+
}
103
+
}
104
+
if i < 0 {
105
+
i = 0
106
+
}
107
+
108
+
var lastSeq int64 = since
109
+
for _, c := range chunks[i:] {
110
+
var nread int
111
+
evts := c.events()
112
+
for nread < len(evts) {
113
+
for _, e := range evts[nread:] {
114
+
nread++
115
+
seq := events.SequenceForEvent(e)
116
+
if seq <= since {
117
+
continue
118
+
}
119
+
120
+
if err := cb(e); err != nil {
121
+
return 0, err
122
+
}
123
+
lastSeq = seq
124
+
}
125
+
126
+
// recheck evts buffer to see if more were added while we were here
127
+
evts = c.events()
128
+
}
129
+
}
130
+
131
+
return lastSeq, nil
132
+
}
133
+
134
+
func (er *EventRingBuffer) SetEventBroadcaster(brc func(*events.XRPCStreamEvent)) {
135
+
er.broadcast = brc
136
+
}
137
+
138
+
func (er *EventRingBuffer) Shutdown(context.Context) error {
139
+
return nil
140
+
}
141
+
142
+
func (er *EventRingBuffer) TakeDownRepo(context.Context, models.Uid) error {
143
+
return nil
144
+
}
+675
splitter/splitter.go
+675
splitter/splitter.go
···
1
+
package splitter
2
+
3
+
import (
4
+
"bytes"
5
+
"context"
6
+
"encoding/json"
7
+
"errors"
8
+
"fmt"
9
+
"go.opentelemetry.io/otel"
10
+
"io"
11
+
"log/slog"
12
+
"math/rand"
13
+
"net"
14
+
"net/http"
15
+
"net/url"
16
+
"os"
17
+
"strconv"
18
+
"strings"
19
+
"sync"
20
+
"time"
21
+
22
+
"github.com/bluesky-social/indigo/api/atproto"
23
+
comatproto "github.com/bluesky-social/indigo/api/atproto"
24
+
"github.com/bluesky-social/indigo/bgs"
25
+
events "github.com/bluesky-social/indigo/events"
26
+
"github.com/bluesky-social/indigo/events/schedulers/sequential"
27
+
"github.com/bluesky-social/indigo/util"
28
+
"github.com/bluesky-social/indigo/xrpc"
29
+
"github.com/gorilla/websocket"
30
+
"github.com/labstack/echo/v4"
31
+
"github.com/labstack/echo/v4/middleware"
32
+
promclient "github.com/prometheus/client_golang/prometheus"
33
+
"github.com/prometheus/client_golang/prometheus/promhttp"
34
+
dto "github.com/prometheus/client_model/go"
35
+
)
36
+
37
+
type Splitter struct {
38
+
erb *EventRingBuffer
39
+
pp *events.PebblePersist
40
+
events *events.EventManager
41
+
42
+
// Management of Socket Consumers
43
+
consumersLk sync.RWMutex
44
+
nextConsumerID uint64
45
+
consumers map[uint64]*SocketConsumer
46
+
47
+
conf SplitterConfig
48
+
49
+
log *slog.Logger
50
+
51
+
httpC *http.Client
52
+
nextCrawlers []*url.URL
53
+
}
54
+
55
+
type SplitterConfig struct {
56
+
UpstreamHost string
57
+
CursorFile string
58
+
PebbleOptions *events.PebblePersistOptions
59
+
}
60
+
61
+
func (sc *SplitterConfig) XrpcRootUrl() string {
62
+
if strings.HasPrefix(sc.UpstreamHost, "http://") {
63
+
return sc.UpstreamHost
64
+
}
65
+
if strings.HasPrefix(sc.UpstreamHost, "https://") {
66
+
return sc.UpstreamHost
67
+
}
68
+
if strings.HasPrefix(sc.UpstreamHost, "ws://") {
69
+
return "http://" + sc.UpstreamHost[5:]
70
+
}
71
+
if strings.HasPrefix(sc.UpstreamHost, "wss://") {
72
+
return "https://" + sc.UpstreamHost[6:]
73
+
}
74
+
return "https://" + sc.UpstreamHost
75
+
}
76
+
77
+
func NewSplitter(conf SplitterConfig, nextCrawlers []string) (*Splitter, error) {
78
+
var nextCrawlerURLs []*url.URL
79
+
log := slog.Default().With("system", "splitter")
80
+
if len(nextCrawlers) > 0 {
81
+
nextCrawlerURLs = make([]*url.URL, len(nextCrawlers))
82
+
for i, tu := range nextCrawlers {
83
+
var err error
84
+
nextCrawlerURLs[i], err = url.Parse(tu)
85
+
if err != nil {
86
+
return nil, fmt.Errorf("failed to parse next-crawler url: %w", err)
87
+
}
88
+
log.Info("configuring relay for requestCrawl", "host", nextCrawlerURLs[i])
89
+
}
90
+
}
91
+
92
+
s := &Splitter{
93
+
conf: conf,
94
+
consumers: make(map[uint64]*SocketConsumer),
95
+
log: log,
96
+
httpC: util.RobustHTTPClient(),
97
+
nextCrawlers: nextCrawlerURLs,
98
+
}
99
+
100
+
if conf.PebbleOptions == nil {
101
+
// mem splitter
102
+
erb := NewEventRingBuffer(20_000, 10_000)
103
+
s.erb = erb
104
+
s.events = events.NewEventManager(erb)
105
+
} else {
106
+
pp, err := events.NewPebblePersistance(conf.PebbleOptions)
107
+
if err != nil {
108
+
return nil, err
109
+
}
110
+
go pp.GCThread(context.Background())
111
+
s.pp = pp
112
+
s.events = events.NewEventManager(pp)
113
+
}
114
+
115
+
return s, nil
116
+
}
117
+
func NewDiskSplitter(host, path string, persistHours float64, maxBytes int64) (*Splitter, error) {
118
+
ppopts := events.PebblePersistOptions{
119
+
DbPath: path,
120
+
PersistDuration: time.Duration(float64(time.Hour) * persistHours),
121
+
GCPeriod: 5 * time.Minute,
122
+
MaxBytes: uint64(maxBytes),
123
+
}
124
+
conf := SplitterConfig{
125
+
UpstreamHost: host,
126
+
CursorFile: "cursor-file",
127
+
PebbleOptions: &ppopts,
128
+
}
129
+
pp, err := events.NewPebblePersistance(&ppopts)
130
+
if err != nil {
131
+
return nil, err
132
+
}
133
+
134
+
go pp.GCThread(context.Background())
135
+
em := events.NewEventManager(pp)
136
+
return &Splitter{
137
+
conf: conf,
138
+
pp: pp,
139
+
events: em,
140
+
consumers: make(map[uint64]*SocketConsumer),
141
+
log: slog.Default().With("system", "splitter"),
142
+
}, nil
143
+
}
144
+
145
+
func (s *Splitter) Start(addr string) error {
146
+
var lc net.ListenConfig
147
+
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
148
+
defer cancel()
149
+
150
+
curs, err := s.getLastCursor()
151
+
if err != nil {
152
+
return fmt.Errorf("loading cursor failed: %w", err)
153
+
}
154
+
155
+
go s.subscribeWithRedialer(context.Background(), s.conf.UpstreamHost, curs)
156
+
157
+
li, err := lc.Listen(ctx, "tcp", addr)
158
+
if err != nil {
159
+
return err
160
+
}
161
+
return s.StartWithListener(li)
162
+
}
163
+
164
+
func (s *Splitter) StartMetrics(listen string) error {
165
+
http.Handle("/metrics", promhttp.Handler())
166
+
return http.ListenAndServe(listen, nil)
167
+
}
168
+
169
+
func (s *Splitter) Shutdown() error {
170
+
return nil
171
+
}
172
+
173
+
func (s *Splitter) StartWithListener(listen net.Listener) error {
174
+
e := echo.New()
175
+
e.HideBanner = true
176
+
177
+
e.Use(middleware.CORSWithConfig(middleware.CORSConfig{
178
+
AllowOrigins: []string{"*"},
179
+
AllowHeaders: []string{echo.HeaderOrigin, echo.HeaderContentType, echo.HeaderAccept, echo.HeaderAuthorization},
180
+
}))
181
+
182
+
/*
183
+
if !s.ssl {
184
+
e.Use(middleware.LoggerWithConfig(middleware.LoggerConfig{
185
+
Format: "method=${method}, uri=${uri}, status=${status} latency=${latency_human}\n",
186
+
}))
187
+
} else {
188
+
e.Use(middleware.LoggerWithConfig(middleware.DefaultLoggerConfig))
189
+
}
190
+
*/
191
+
192
+
e.Use(bgs.MetricsMiddleware)
193
+
194
+
e.HTTPErrorHandler = func(err error, ctx echo.Context) {
195
+
switch err := err.(type) {
196
+
case *echo.HTTPError:
197
+
if err2 := ctx.JSON(err.Code, map[string]any{
198
+
"error": err.Message,
199
+
}); err2 != nil {
200
+
s.log.Error("Failed to write http error", "err", err2)
201
+
}
202
+
default:
203
+
sendHeader := true
204
+
if ctx.Path() == "/xrpc/com.atproto.sync.subscribeRepos" {
205
+
sendHeader = false
206
+
}
207
+
208
+
s.log.Warn("HANDLER ERROR", "path", ctx.Path(), "err", err)
209
+
210
+
if strings.HasPrefix(ctx.Path(), "/admin/") {
211
+
ctx.JSON(500, map[string]any{
212
+
"error": err.Error(),
213
+
})
214
+
return
215
+
}
216
+
217
+
if sendHeader {
218
+
ctx.Response().WriteHeader(500)
219
+
}
220
+
}
221
+
}
222
+
223
+
// TODO: this API is temporary until we formalize what we want here
224
+
225
+
e.POST("/xrpc/com.atproto.sync.requestCrawl", s.RequestCrawlHandler)
226
+
e.GET("/xrpc/com.atproto.sync.subscribeRepos", s.EventsHandler)
227
+
e.GET("/xrpc/com.atproto.sync.listRepos", s.HandleComAtprotoSyncListRepos)
228
+
229
+
e.GET("/xrpc/_health", s.HandleHealthCheck)
230
+
e.GET("/_health", s.HandleHealthCheck)
231
+
e.GET("/", s.HandleHomeMessage)
232
+
233
+
// In order to support booting on random ports in tests, we need to tell the
234
+
// Echo instance it's already got a port, and then use its StartServer
235
+
// method to re-use that listener.
236
+
e.Listener = listen
237
+
srv := &http.Server{}
238
+
return e.StartServer(srv)
239
+
}
240
+
241
+
type HealthStatus struct {
242
+
Status string `json:"status"`
243
+
Message string `json:"msg,omitempty"`
244
+
}
245
+
246
+
func (s *Splitter) HandleHealthCheck(c echo.Context) error {
247
+
return c.JSON(200, HealthStatus{Status: "ok"})
248
+
}
249
+
250
+
var homeMessage string = `
251
+
_ _
252
+
_ _ __ _(_)_ _ | |__ _____ __ __
253
+
| '_/ _' | | ' \| '_ \/ _ \ V V /
254
+
|_| \__,_|_|_||_|_.__/\___/\_/\_/
255
+
256
+
This is an atproto [https://atproto.com] firehose fanout service, running the 'rainbow' codebase [https://github.com/bluesky-social/indigo]
257
+
258
+
The firehose WebSocket path is at: /xrpc/com.atproto.sync.subscribeRepos
259
+
`
260
+
261
+
func (s *Splitter) HandleHomeMessage(c echo.Context) error {
262
+
return c.String(http.StatusOK, homeMessage)
263
+
}
264
+
265
+
type XRPCError struct {
266
+
Message string `json:"message"`
267
+
}
268
+
269
+
func (s *Splitter) RequestCrawlHandler(c echo.Context) error {
270
+
ctx := c.Request().Context()
271
+
var body comatproto.SyncRequestCrawl_Input
272
+
if err := c.Bind(&body); err != nil {
273
+
return c.JSON(http.StatusBadRequest, XRPCError{Message: fmt.Sprintf("invalid body: %s", err)})
274
+
}
275
+
276
+
host := body.Hostname
277
+
if host == "" {
278
+
return echo.NewHTTPError(http.StatusBadRequest, "must pass hostname")
279
+
}
280
+
281
+
if !strings.HasPrefix(host, "http://") && !strings.HasPrefix(host, "https://") {
282
+
host = "https://" + host
283
+
}
284
+
285
+
u, err := url.Parse(host)
286
+
if err != nil {
287
+
return echo.NewHTTPError(http.StatusBadRequest, "failed to parse hostname")
288
+
}
289
+
290
+
if u.Scheme == "http" {
291
+
return echo.NewHTTPError(http.StatusBadRequest, "this server requires https")
292
+
}
293
+
if u.Path != "" {
294
+
return echo.NewHTTPError(http.StatusBadRequest, "must pass hostname without path")
295
+
}
296
+
297
+
if u.Query().Encode() != "" {
298
+
return echo.NewHTTPError(http.StatusBadRequest, "must pass hostname without query")
299
+
}
300
+
301
+
host = u.Host // potentially hostname:port
302
+
303
+
clientHost := fmt.Sprintf("%s://%s", u.Scheme, host)
304
+
305
+
xrpcC := &xrpc.Client{
306
+
Host: clientHost,
307
+
Client: http.DefaultClient, // not using the client that auto-retries
308
+
}
309
+
310
+
desc, err := atproto.ServerDescribeServer(ctx, xrpcC)
311
+
if err != nil {
312
+
errMsg := fmt.Sprintf("requested host (%s) failed to respond to describe request", clientHost)
313
+
return echo.NewHTTPError(http.StatusBadRequest, errMsg)
314
+
}
315
+
316
+
// Maybe we could do something with this response later
317
+
_ = desc
318
+
319
+
if len(s.nextCrawlers) != 0 {
320
+
blob, err := json.Marshal(body)
321
+
if err != nil {
322
+
s.log.Warn("could not forward requestCrawl, json err", "err", err)
323
+
} else {
324
+
go func(bodyBlob []byte) {
325
+
for _, remote := range s.nextCrawlers {
326
+
if remote == nil {
327
+
continue
328
+
}
329
+
330
+
pu := remote.JoinPath("/xrpc/com.atproto.sync.requestCrawl")
331
+
response, err := s.httpC.Post(pu.String(), "application/json", bytes.NewReader(bodyBlob))
332
+
if response != nil && response.Body != nil {
333
+
response.Body.Close()
334
+
}
335
+
if err != nil || response == nil {
336
+
s.log.Warn("requestCrawl forward failed", "host", remote, "err", err)
337
+
} else if response.StatusCode != http.StatusOK {
338
+
s.log.Warn("requestCrawl forward failed", "host", remote, "status", response.Status)
339
+
} else {
340
+
s.log.Info("requestCrawl forward successful", "host", remote)
341
+
}
342
+
}
343
+
}(blob)
344
+
}
345
+
}
346
+
347
+
return c.JSON(200, HealthStatus{Status: "ok"})
348
+
}
349
+
350
+
func (s *Splitter) HandleComAtprotoSyncListRepos(c echo.Context) error {
351
+
ctx, span := otel.Tracer("server").Start(c.Request().Context(), "HandleComAtprotoSyncListRepos")
352
+
defer span.End()
353
+
354
+
cursorQuery := c.QueryParam("cursor")
355
+
limitQuery := c.QueryParam("limit")
356
+
357
+
var err error
358
+
359
+
limit := int64(500)
360
+
if limitQuery != "" {
361
+
limit, err = strconv.ParseInt(limitQuery, 10, 64)
362
+
if err != nil || limit < 1 || limit > 1000 {
363
+
return c.JSON(http.StatusBadRequest, XRPCError{Message: fmt.Sprintf("invalid limit: %s", limitQuery)})
364
+
}
365
+
}
366
+
367
+
client := xrpc.Client{
368
+
Client: s.httpC,
369
+
Host: s.conf.XrpcRootUrl(),
370
+
}
371
+
372
+
out, handleErr := atproto.SyncListRepos(ctx, &client, cursorQuery, limit)
373
+
if handleErr != nil {
374
+
return handleErr
375
+
}
376
+
return c.JSON(200, out)
377
+
}
378
+
379
+
func (s *Splitter) EventsHandler(c echo.Context) error {
380
+
var since *int64
381
+
if sinceVal := c.QueryParam("cursor"); sinceVal != "" {
382
+
sval, err := strconv.ParseInt(sinceVal, 10, 64)
383
+
if err != nil {
384
+
return err
385
+
}
386
+
since = &sval
387
+
}
388
+
389
+
ctx, cancel := context.WithCancel(c.Request().Context())
390
+
defer cancel()
391
+
392
+
// TODO: authhhh
393
+
conn, err := websocket.Upgrade(c.Response(), c.Request(), c.Response().Header(), 10<<10, 10<<10)
394
+
if err != nil {
395
+
return fmt.Errorf("upgrading websocket: %w", err)
396
+
}
397
+
398
+
lastWriteLk := sync.Mutex{}
399
+
lastWrite := time.Now()
400
+
401
+
// Start a goroutine to ping the client every 30 seconds to check if it's
402
+
// still alive. If the client doesn't respond to a ping within 5 seconds,
403
+
// we'll close the connection and teardown the consumer.
404
+
go func() {
405
+
ticker := time.NewTicker(30 * time.Second)
406
+
defer ticker.Stop()
407
+
408
+
for {
409
+
select {
410
+
case <-ticker.C:
411
+
lastWriteLk.Lock()
412
+
lw := lastWrite
413
+
lastWriteLk.Unlock()
414
+
415
+
if time.Since(lw) < 30*time.Second {
416
+
continue
417
+
}
418
+
419
+
if err := conn.WriteControl(websocket.PingMessage, []byte{}, time.Now().Add(5*time.Second)); err != nil {
420
+
s.log.Error("failed to ping client", "err", err)
421
+
cancel()
422
+
return
423
+
}
424
+
case <-ctx.Done():
425
+
return
426
+
}
427
+
}
428
+
}()
429
+
430
+
conn.SetPingHandler(func(message string) error {
431
+
err := conn.WriteControl(websocket.PongMessage, []byte(message), time.Now().Add(time.Second*60))
432
+
if err == websocket.ErrCloseSent {
433
+
return nil
434
+
} else if e, ok := err.(net.Error); ok && e.Temporary() {
435
+
return nil
436
+
}
437
+
return err
438
+
})
439
+
440
+
// Start a goroutine to read messages from the client and discard them.
441
+
go func() {
442
+
for {
443
+
_, _, err := conn.ReadMessage()
444
+
if err != nil {
445
+
s.log.Error("failed to read message from client", "err", err)
446
+
cancel()
447
+
return
448
+
}
449
+
}
450
+
}()
451
+
452
+
ident := c.RealIP() + "-" + c.Request().UserAgent()
453
+
454
+
evts, cleanup, err := s.events.Subscribe(ctx, ident, func(evt *events.XRPCStreamEvent) bool { return true }, since)
455
+
if err != nil {
456
+
return err
457
+
}
458
+
defer cleanup()
459
+
460
+
// Keep track of the consumer for metrics and admin endpoints
461
+
consumer := SocketConsumer{
462
+
RemoteAddr: c.RealIP(),
463
+
UserAgent: c.Request().UserAgent(),
464
+
ConnectedAt: time.Now(),
465
+
}
466
+
sentCounter := eventsSentCounter.WithLabelValues(consumer.RemoteAddr, consumer.UserAgent)
467
+
consumer.EventsSent = sentCounter
468
+
469
+
consumerID := s.registerConsumer(&consumer)
470
+
defer s.cleanupConsumer(consumerID)
471
+
472
+
s.log.Info("new consumer",
473
+
"remote_addr", consumer.RemoteAddr,
474
+
"user_agent", consumer.UserAgent,
475
+
"cursor", since,
476
+
"consumer_id", consumerID,
477
+
)
478
+
activeClientGauge.Inc()
479
+
defer activeClientGauge.Dec()
480
+
481
+
for {
482
+
select {
483
+
case evt, ok := <-evts:
484
+
if !ok {
485
+
s.log.Error("event stream closed unexpectedly")
486
+
return nil
487
+
}
488
+
489
+
wc, err := conn.NextWriter(websocket.BinaryMessage)
490
+
if err != nil {
491
+
s.log.Error("failed to get next writer", "err", err)
492
+
return err
493
+
}
494
+
495
+
if evt.Preserialized != nil {
496
+
_, err = wc.Write(evt.Preserialized)
497
+
} else {
498
+
err = evt.Serialize(wc)
499
+
}
500
+
if err != nil {
501
+
return fmt.Errorf("failed to write event: %w", err)
502
+
}
503
+
504
+
if err := wc.Close(); err != nil {
505
+
s.log.Warn("failed to flush-close our event write", "err", err)
506
+
return nil
507
+
}
508
+
509
+
lastWriteLk.Lock()
510
+
lastWrite = time.Now()
511
+
lastWriteLk.Unlock()
512
+
sentCounter.Inc()
513
+
case <-ctx.Done():
514
+
return nil
515
+
}
516
+
}
517
+
}
518
+
519
+
type SocketConsumer struct {
520
+
UserAgent string
521
+
RemoteAddr string
522
+
ConnectedAt time.Time
523
+
EventsSent promclient.Counter
524
+
}
525
+
526
+
func (s *Splitter) registerConsumer(c *SocketConsumer) uint64 {
527
+
s.consumersLk.Lock()
528
+
defer s.consumersLk.Unlock()
529
+
530
+
id := s.nextConsumerID
531
+
s.nextConsumerID++
532
+
533
+
s.consumers[id] = c
534
+
535
+
return id
536
+
}
537
+
538
+
func (s *Splitter) cleanupConsumer(id uint64) {
539
+
s.consumersLk.Lock()
540
+
defer s.consumersLk.Unlock()
541
+
542
+
c := s.consumers[id]
543
+
544
+
var m = &dto.Metric{}
545
+
if err := c.EventsSent.Write(m); err != nil {
546
+
s.log.Error("failed to get sent counter", "err", err)
547
+
}
548
+
549
+
s.log.Info("consumer disconnected",
550
+
"consumer_id", id,
551
+
"remote_addr", c.RemoteAddr,
552
+
"user_agent", c.UserAgent,
553
+
"events_sent", m.Counter.GetValue())
554
+
555
+
delete(s.consumers, id)
556
+
}
557
+
558
+
func sleepForBackoff(b int) time.Duration {
559
+
if b == 0 {
560
+
return 0
561
+
}
562
+
563
+
if b < 50 {
564
+
return time.Millisecond * time.Duration(rand.Intn(100)+(5*b))
565
+
}
566
+
567
+
return time.Second * 5
568
+
}
569
+
570
+
func (s *Splitter) subscribeWithRedialer(ctx context.Context, host string, cursor int64) {
571
+
d := websocket.Dialer{}
572
+
573
+
protocol := "wss"
574
+
575
+
var backoff int
576
+
for {
577
+
select {
578
+
case <-ctx.Done():
579
+
return
580
+
default:
581
+
}
582
+
583
+
header := http.Header{
584
+
"User-Agent": []string{"bgs-rainbow-v0"},
585
+
}
586
+
587
+
var url string
588
+
if cursor < 0 {
589
+
url = fmt.Sprintf("%s://%s/xrpc/com.atproto.sync.subscribeRepos", protocol, host)
590
+
} else {
591
+
url = fmt.Sprintf("%s://%s/xrpc/com.atproto.sync.subscribeRepos?cursor=%d", protocol, host, cursor)
592
+
}
593
+
con, res, err := d.DialContext(ctx, url, header)
594
+
if err != nil {
595
+
s.log.Warn("dialing failed", "host", host, "err", err, "backoff", backoff)
596
+
time.Sleep(sleepForBackoff(backoff))
597
+
backoff++
598
+
599
+
continue
600
+
}
601
+
602
+
s.log.Info("event subscription response", "code", res.StatusCode)
603
+
604
+
if err := s.handleConnection(ctx, host, con, &cursor); err != nil {
605
+
s.log.Warn("connection failed", "host", host, "err", err)
606
+
}
607
+
}
608
+
}
609
+
610
+
func (s *Splitter) handleConnection(ctx context.Context, host string, con *websocket.Conn, lastCursor *int64) error {
611
+
ctx, cancel := context.WithCancel(ctx)
612
+
defer cancel()
613
+
614
+
sched := sequential.NewScheduler("splitter", func(ctx context.Context, evt *events.XRPCStreamEvent) error {
615
+
seq := events.SequenceForEvent(evt)
616
+
if seq < 0 {
617
+
// ignore info events and other unsupported types
618
+
return nil
619
+
}
620
+
621
+
if err := s.events.AddEvent(ctx, evt); err != nil {
622
+
return err
623
+
}
624
+
625
+
if seq%5000 == 0 {
626
+
// TODO: don't need this after we move to getting seq from pebble
627
+
if err := s.writeCursor(seq); err != nil {
628
+
s.log.Error("write cursor failed", "err", err)
629
+
}
630
+
}
631
+
632
+
*lastCursor = seq
633
+
return nil
634
+
})
635
+
636
+
return events.HandleRepoStream(ctx, con, sched, nil)
637
+
}
638
+
639
+
func (s *Splitter) getLastCursor() (int64, error) {
640
+
if s.pp != nil {
641
+
seq, millis, _, err := s.pp.GetLast(context.Background())
642
+
if err == nil {
643
+
s.log.Debug("got last cursor from pebble", "seq", seq, "millis", millis)
644
+
return seq, nil
645
+
} else if errors.Is(err, events.ErrNoLast) {
646
+
s.log.Info("pebble no last")
647
+
} else {
648
+
s.log.Error("pebble seq fail", "err", err)
649
+
}
650
+
}
651
+
652
+
fi, err := os.Open(s.conf.CursorFile)
653
+
if err != nil {
654
+
if os.IsNotExist(err) {
655
+
return -1, nil
656
+
}
657
+
return -1, err
658
+
}
659
+
660
+
b, err := io.ReadAll(fi)
661
+
if err != nil {
662
+
return -1, err
663
+
}
664
+
665
+
v, err := strconv.ParseInt(string(b), 10, 64)
666
+
if err != nil {
667
+
return -1, err
668
+
}
669
+
670
+
return v, nil
671
+
}
672
+
673
+
func (s *Splitter) writeCursor(curs int64) error {
674
+
return os.WriteFile(s.conf.CursorFile, []byte(fmt.Sprint(curs)), 0664)
675
+
}
+37
-12
testing/integ_test.go
+37
-12
testing/integ_test.go
···
15
15
"github.com/bluesky-social/indigo/repo"
16
16
"github.com/bluesky-social/indigo/xrpc"
17
17
"github.com/ipfs/go-cid"
18
-
"github.com/ipfs/go-log/v2"
19
18
car "github.com/ipld/go-car"
20
19
"github.com/stretchr/testify/assert"
21
20
)
22
21
23
-
func init() {
24
-
log.SetAllLoggers(log.LevelInfo)
22
+
func TestRelayBasic(t *testing.T) {
23
+
t.Helper()
24
+
testRelayBasic(t, true)
25
+
}
26
+
27
+
func TestRelayBasicNonArchive(t *testing.T) {
28
+
t.Helper()
29
+
testRelayBasic(t, false)
25
30
}
26
31
27
-
func TestRelayBasic(t *testing.T) {
32
+
func testRelayBasic(t *testing.T, archive bool) {
28
33
if testing.Short() {
29
34
t.Skip("skipping Relay test in 'short' test mode")
30
35
}
···
33
38
p1 := MustSetupPDS(t, ".tpds", didr)
34
39
p1.Run(t)
35
40
36
-
b1 := MustSetupRelay(t, didr)
41
+
b1 := MustSetupRelay(t, didr, archive)
37
42
b1.Run(t)
38
43
39
44
b1.tr.TrialHosts = []string{p1.RawHost()}
···
116
121
}
117
122
118
123
func TestRelayMultiPDS(t *testing.T) {
124
+
t.Helper()
125
+
testRelayMultiPDS(t, true)
126
+
}
127
+
128
+
func TestRelayMultiPDSNonArchive(t *testing.T) {
129
+
t.Helper()
130
+
testRelayMultiPDS(t, false)
131
+
}
132
+
133
+
func testRelayMultiPDS(t *testing.T, archive bool) {
119
134
if testing.Short() {
120
135
t.Skip("skipping Relay test in 'short' test mode")
121
136
}
···
130
145
p2 := MustSetupPDS(t, ".pdsdos", didr)
131
146
p2.Run(t)
132
147
133
-
b1 := MustSetupRelay(t, didr)
148
+
b1 := MustSetupRelay(t, didr, archive)
134
149
b1.Run(t)
135
150
136
151
b1.tr.TrialHosts = []string{p1.RawHost(), p2.RawHost()}
···
198
213
p2 := MustSetupPDS(t, ".pdsdos", didr)
199
214
p2.Run(t)
200
215
201
-
b1 := MustSetupRelay(t, didr)
216
+
b1 := MustSetupRelay(t, didr, true)
202
217
b1.Run(t)
203
218
204
219
b1.tr.TrialHosts = []string{p1.RawHost(), p2.RawHost()}
···
256
271
p1 := MustSetupPDS(t, ".pdsuno", didr)
257
272
p1.Run(t)
258
273
259
-
b1 := MustSetupRelay(t, didr)
274
+
b1 := MustSetupRelay(t, didr, true)
260
275
b1.Run(t)
261
276
262
277
b1.tr.TrialHosts = []string{p1.RawHost()}
···
293
308
p1 := MustSetupPDS(t, ".pdsuno", didr)
294
309
p1.Run(t)
295
310
296
-
b1 := MustSetupRelay(t, didr)
311
+
b1 := MustSetupRelay(t, didr, true)
297
312
b1.Run(t)
298
313
299
314
b1.tr.TrialHosts = []string{p1.RawHost()}
···
391
406
}
392
407
393
408
func TestRelayTakedown(t *testing.T) {
409
+
testRelayTakedown(t, true)
410
+
}
411
+
412
+
func TestRelayTakedownNonArchive(t *testing.T) {
413
+
testRelayTakedown(t, false)
414
+
}
415
+
416
+
func testRelayTakedown(t *testing.T, archive bool) {
394
417
if testing.Short() {
395
418
t.Skip("skipping Relay test in 'short' test mode")
396
419
}
···
401
424
p1 := MustSetupPDS(t, ".tpds", didr)
402
425
p1.Run(t)
403
426
404
-
b1 := MustSetupRelay(t, didr)
427
+
b1 := MustSetupRelay(t, didr, true)
405
428
b1.Run(t)
406
429
407
430
b1.tr.TrialHosts = []string{p1.RawHost()}
···
480
503
}
481
504
didr := TestPLC(t)
482
505
483
-
b1 := MustSetupRelay(t, didr)
506
+
b1 := MustSetupRelay(t, didr, true)
484
507
b1.Run(t)
485
508
486
509
b1.BanDomain(t, "foo.com")
···
523
546
p1 := MustSetupPDS(t, ".tpds", didr)
524
547
p1.Run(t)
525
548
526
-
b1 := MustSetupRelay(t, didr)
549
+
b1 := MustSetupRelay(t, didr, true)
527
550
b1.Run(t)
528
551
529
552
b1.tr.TrialHosts = []string{p1.RawHost()}
···
541
564
e1 := evts.Next()
542
565
assert.NotNil(e1.RepoCommit)
543
566
assert.Equal(e1.RepoCommit.Repo, bob.DID())
567
+
fmt.Println(e1.RepoCommit.Ops[0])
544
568
545
569
ctx := context.TODO()
546
570
rm := p1.server.Repoman()
···
549
573
}
550
574
551
575
e2 := evts.Next()
576
+
//fmt.Println(e2.RepoCommit.Ops[0])
552
577
assert.Equal(len(e2.RepoCommit.Ops), 0)
553
578
assert.Equal(e2.RepoCommit.Repo, bob.DID())
554
579
}
+19
-9
testing/utils.go
+19
-9
testing/utils.go
···
117
117
return nil, err
118
118
}
119
119
120
-
cs, err := carstore.NewCarStore(cardb, cspath)
120
+
cs, err := carstore.NewCarStore(cardb, []string{cspath})
121
121
if err != nil {
122
122
return nil, err
123
123
}
···
471
471
t.Helper()
472
472
473
473
ctx := context.TODO()
474
-
resp, err := bsky.NotificationListNotifications(ctx, u.client, "", 100, false, "")
474
+
resp, err := bsky.NotificationListNotifications(ctx, u.client, "", 100, false, nil, "")
475
475
if err != nil {
476
476
t.Fatal(err)
477
477
}
···
518
518
return t.listener.Addr().String()
519
519
}
520
520
521
-
func MustSetupRelay(t *testing.T, didr plc.PLCClient) *TestRelay {
521
+
func MustSetupRelay(t *testing.T, didr plc.PLCClient, archive bool) *TestRelay {
522
522
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
523
523
defer cancel()
524
-
tbgs, err := SetupRelay(ctx, didr)
524
+
tbgs, err := SetupRelay(ctx, didr, archive)
525
525
if err != nil {
526
526
t.Fatal(err)
527
527
}
···
529
529
return tbgs
530
530
}
531
531
532
-
func SetupRelay(ctx context.Context, didr plc.PLCClient) (*TestRelay, error) {
532
+
func SetupRelay(ctx context.Context, didr plc.PLCClient, archive bool) (*TestRelay, error) {
533
533
dir, err := os.MkdirTemp("", "integtest")
534
534
if err != nil {
535
535
return nil, err
···
550
550
return nil, err
551
551
}
552
552
553
-
cs, err := carstore.NewCarStore(cardb, cspath)
554
-
if err != nil {
555
-
return nil, err
553
+
var cs carstore.CarStore
554
+
if archive {
555
+
arccs, err := carstore.NewCarStore(cardb, []string{cspath})
556
+
if err != nil {
557
+
return nil, err
558
+
}
559
+
cs = arccs
560
+
} else {
561
+
nacs, err := carstore.NewNonArchivalCarstore(cardb)
562
+
if err != nil {
563
+
return nil, err
564
+
}
565
+
cs = nacs
556
566
}
557
567
558
568
//kmgr := indexer.NewKeyManager(didr, nil)
···
691
701
},
692
702
}
693
703
seqScheduler := sequential.NewScheduler("test", rsc.EventHandler)
694
-
if err := events.HandleRepoStream(ctx, con, seqScheduler); err != nil {
704
+
if err := events.HandleRepoStream(ctx, con, seqScheduler, nil); err != nil {
695
705
fmt.Println(err)
696
706
}
697
707
}()
+35
util/cliutil/ipfslog.go
+35
util/cliutil/ipfslog.go
···
1
+
package cliutil
2
+
3
+
import (
4
+
"io"
5
+
6
+
ipfslog "github.com/ipfs/go-log/v2"
7
+
"go.uber.org/zap/zapcore"
8
+
)
9
+
10
+
func SetIpfsWriter(out io.Writer, format string, level string) {
11
+
var ze zapcore.Encoder
12
+
switch format {
13
+
case "json":
14
+
ze = zapcore.NewJSONEncoder(zapcore.EncoderConfig{})
15
+
case "text":
16
+
ze = zapcore.NewConsoleEncoder(zapcore.EncoderConfig{})
17
+
default:
18
+
ze = zapcore.NewConsoleEncoder(zapcore.EncoderConfig{})
19
+
}
20
+
var zl zapcore.LevelEnabler
21
+
switch level {
22
+
case "debug":
23
+
zl = zapcore.DebugLevel
24
+
case "info":
25
+
zl = zapcore.InfoLevel
26
+
case "warn":
27
+
zl = zapcore.WarnLevel
28
+
case "error":
29
+
zl = zapcore.ErrorLevel
30
+
default:
31
+
zl = zapcore.InfoLevel
32
+
}
33
+
nc := zapcore.NewCore(ze, zapcore.AddSync(out), zl)
34
+
ipfslog.SetPrimaryCore(nc)
35
+
}
+349
util/cliutil/util.go
+349
util/cliutil/util.go
···
2
2
3
3
import (
4
4
"encoding/json"
5
+
"errors"
5
6
"fmt"
7
+
"io"
8
+
"io/fs"
9
+
"log/slog"
6
10
"net/http"
7
11
"os"
8
12
"path/filepath"
13
+
"regexp"
14
+
"sort"
15
+
"strconv"
9
16
"strings"
10
17
"time"
11
18
···
230
237
231
238
return db, nil
232
239
}
240
+
241
+
type LogOptions struct {
242
+
// e.g. 1_000_000_000
243
+
LogRotateBytes int64
244
+
245
+
// path to write to, if rotating, %T gets UnixMilli at file open time
246
+
// NOTE: substitution is simple replace("%T", "")
247
+
LogPath string
248
+
249
+
// text|json
250
+
LogFormat string
251
+
252
+
// info|debug|warn|error
253
+
LogLevel string
254
+
255
+
// Keep N old logs (not including current); <0 disables removal, 0==remove all old log files immediately
256
+
KeepOld int
257
+
}
258
+
259
+
func firstenv(env_var_names ...string) string {
260
+
for _, env_var_name := range env_var_names {
261
+
val := os.Getenv(env_var_name)
262
+
if val != "" {
263
+
return val
264
+
}
265
+
}
266
+
return ""
267
+
}
268
+
269
+
// SetupSlog integrates passed in options and env vars.
270
+
//
271
+
// passing default cliutil.LogOptions{} is ok.
272
+
//
273
+
// BSKYLOG_LOG_LEVEL=info|debug|warn|error
274
+
//
275
+
// BSKYLOG_LOG_FMT=text|json
276
+
//
277
+
// BSKYLOG_FILE=path (or "-" or "" for stdout), %T gets UnixMilli; if a path with '/', {prefix}/current becomes a link to active log file
278
+
//
279
+
// BSKYLOG_ROTATE_BYTES=int maximum size of log chunk before rotating
280
+
//
281
+
// BSKYLOG_ROTATE_KEEP=int keep N olg logs (not including current)
282
+
//
283
+
// The env vars were derived from ipfs logging library, and also respond to some GOLOG_ vars from that library,
284
+
// but BSKYLOG_ variables are preferred because imported code still using the ipfs log library may misbehave
285
+
// if some GOLOG values are set, especially GOLOG_FILE.
286
+
func SetupSlog(options LogOptions) (*slog.Logger, error) {
287
+
fmt.Fprintf(os.Stderr, "SetupSlog\n")
288
+
var hopts slog.HandlerOptions
289
+
hopts.Level = slog.LevelInfo
290
+
hopts.AddSource = true
291
+
if options.LogLevel == "" {
292
+
options.LogLevel = firstenv("BSKYLOG_LOG_LEVEL", "GOLOG_LOG_LEVEL")
293
+
}
294
+
if options.LogLevel == "" {
295
+
hopts.Level = slog.LevelInfo
296
+
options.LogLevel = "info"
297
+
} else {
298
+
level := strings.ToLower(options.LogLevel)
299
+
switch level {
300
+
case "debug":
301
+
hopts.Level = slog.LevelDebug
302
+
case "info":
303
+
hopts.Level = slog.LevelInfo
304
+
case "warn":
305
+
hopts.Level = slog.LevelWarn
306
+
case "error":
307
+
hopts.Level = slog.LevelError
308
+
default:
309
+
return nil, fmt.Errorf("unknown log level: %#v", options.LogLevel)
310
+
}
311
+
}
312
+
if options.LogFormat == "" {
313
+
options.LogFormat = firstenv("BSKYLOG_LOG_FMT", "GOLOG_LOG_FMT")
314
+
}
315
+
if options.LogFormat == "" {
316
+
options.LogFormat = "text"
317
+
} else {
318
+
format := strings.ToLower(options.LogFormat)
319
+
if format == "json" || format == "text" {
320
+
// ok
321
+
} else {
322
+
return nil, fmt.Errorf("invalid log format: %#v", options.LogFormat)
323
+
}
324
+
options.LogFormat = format
325
+
}
326
+
327
+
if options.LogPath == "" {
328
+
options.LogPath = firstenv("BSKYLOG_FILE", "GOLOG_FILE")
329
+
}
330
+
if options.LogRotateBytes == 0 {
331
+
rotateBytesStr := os.Getenv("BSKYLOG_ROTATE_BYTES") // no GOLOG equivalent
332
+
if rotateBytesStr != "" {
333
+
rotateBytes, err := strconv.ParseInt(rotateBytesStr, 10, 64)
334
+
if err != nil {
335
+
return nil, fmt.Errorf("invalid BSKYLOG_ROTATE_BYTES value: %w", err)
336
+
}
337
+
options.LogRotateBytes = rotateBytes
338
+
}
339
+
}
340
+
if options.KeepOld == 0 {
341
+
keepOldUnset := true
342
+
keepOldStr := os.Getenv("BSKYLOG_ROTATE_KEEP") // no GOLOG equivalent
343
+
if keepOldStr != "" {
344
+
keepOld, err := strconv.ParseInt(keepOldStr, 10, 64)
345
+
if err != nil {
346
+
return nil, fmt.Errorf("invalid BSKYLOG_ROTATE_KEEP value: %w", err)
347
+
}
348
+
keepOldUnset = false
349
+
options.KeepOld = int(keepOld)
350
+
}
351
+
if keepOldUnset {
352
+
options.KeepOld = 2
353
+
}
354
+
}
355
+
logaround := make(chan string, 100)
356
+
go logbouncer(logaround)
357
+
var out io.Writer
358
+
if (options.LogPath == "") || (options.LogPath == "-") {
359
+
out = os.Stdout
360
+
} else if options.LogRotateBytes != 0 {
361
+
out = &logRotateWriter{
362
+
rotateBytes: options.LogRotateBytes,
363
+
outPathTemplate: options.LogPath,
364
+
keep: options.KeepOld,
365
+
logaround: logaround,
366
+
}
367
+
} else {
368
+
var err error
369
+
out, err = os.Create(options.LogPath)
370
+
if err != nil {
371
+
return nil, fmt.Errorf("%s: %w", options.LogPath, err)
372
+
}
373
+
fmt.Fprintf(os.Stderr, "SetupSlog create %#v\n", options.LogPath)
374
+
}
375
+
var handler slog.Handler
376
+
switch options.LogFormat {
377
+
case "text":
378
+
handler = slog.NewTextHandler(out, &hopts)
379
+
case "json":
380
+
handler = slog.NewJSONHandler(out, &hopts)
381
+
default:
382
+
return nil, fmt.Errorf("unknown log format: %#v", options.LogFormat)
383
+
}
384
+
logger := slog.New(handler)
385
+
slog.SetDefault(logger)
386
+
templateDirPart, _ := filepath.Split(options.LogPath)
387
+
ents, _ := os.ReadDir(templateDirPart)
388
+
for _, ent := range ents {
389
+
fmt.Fprintf(os.Stdout, "%s\n", filepath.Join(templateDirPart, ent.Name()))
390
+
}
391
+
SetIpfsWriter(out, options.LogFormat, options.LogLevel)
392
+
return logger, nil
393
+
}
394
+
395
+
type logRotateWriter struct {
396
+
currentWriter io.WriteCloser
397
+
398
+
// how much has been written to current log file
399
+
currentBytes int64
400
+
401
+
// e.g. path/to/logs/foo%T
402
+
currentPath string
403
+
404
+
// e.g. path/to/logs/current
405
+
currentPathCurrent string
406
+
407
+
rotateBytes int64
408
+
409
+
outPathTemplate string
410
+
411
+
// keep the most recent N log files (not including current)
412
+
keep int
413
+
414
+
// write strings to this from inside the log system, a task outside the log system hands them to slog.Info()
415
+
logaround chan<- string
416
+
}
417
+
418
+
func logbouncer(out <-chan string) {
419
+
var logger *slog.Logger
420
+
for line := range out {
421
+
fmt.Fprintf(os.Stderr, "ll %s\n", line)
422
+
if logger == nil {
423
+
// lazy to make sure it crops up after slog Default has been set
424
+
logger = slog.Default().With("system", "logging")
425
+
}
426
+
logger.Info(line)
427
+
}
428
+
}
429
+
430
+
var currentMatcher = regexp.MustCompile("current_\\d+")
431
+
432
+
func (w *logRotateWriter) cleanOldLogs() {
433
+
if w.keep < 0 {
434
+
// old log removal is disabled
435
+
return
436
+
}
437
+
// w.currentPath was recently set as the new log
438
+
dirpart, _ := filepath.Split(w.currentPath)
439
+
// find old logs
440
+
templateDirPart, templateNamePart := filepath.Split(w.outPathTemplate)
441
+
if dirpart != templateDirPart {
442
+
w.logaround <- fmt.Sprintf("current dir part %#v != template dir part %#v\n", w.currentPath, w.outPathTemplate)
443
+
return
444
+
}
445
+
// build a regexp that is string literal parts with \d+ replacing the UnixMilli part
446
+
templateNameParts := strings.Split(templateNamePart, "%T")
447
+
var sb strings.Builder
448
+
first := true
449
+
for _, part := range templateNameParts {
450
+
if first {
451
+
first = false
452
+
} else {
453
+
sb.WriteString("\\d+")
454
+
}
455
+
sb.WriteString(regexp.QuoteMeta(part))
456
+
}
457
+
tmre, err := regexp.Compile(sb.String())
458
+
if err != nil {
459
+
w.logaround <- fmt.Sprintf("failed to compile old log template regexp: %#v\n", err)
460
+
return
461
+
}
462
+
dir, err := os.ReadDir(dirpart)
463
+
if err != nil {
464
+
w.logaround <- fmt.Sprintf("failed to read old log template dir: %#v\n", err)
465
+
return
466
+
}
467
+
var found []fs.FileInfo
468
+
for _, ent := range dir {
469
+
name := ent.Name()
470
+
if tmre.MatchString(name) || currentMatcher.MatchString(name) {
471
+
fi, err := ent.Info()
472
+
if err != nil {
473
+
continue
474
+
}
475
+
found = append(found, fi)
476
+
}
477
+
}
478
+
if len(found) <= w.keep {
479
+
// not too many, nothing to do
480
+
return
481
+
}
482
+
foundMtimeLess := func(i, j int) bool {
483
+
return found[i].ModTime().Before(found[j].ModTime())
484
+
}
485
+
sort.Slice(found, foundMtimeLess)
486
+
drops := found[:len(found)-w.keep]
487
+
for _, fi := range drops {
488
+
fullpath := filepath.Join(dirpart, fi.Name())
489
+
err = os.Remove(fullpath)
490
+
if err != nil {
491
+
w.logaround <- fmt.Sprintf("failed to rm old log: %#v\n", err)
492
+
// but keep going
493
+
}
494
+
// maybe it would be safe to debug-log old log removal from within the logging infrastructure?
495
+
}
496
+
}
497
+
498
+
func (w *logRotateWriter) closeOldLog() []error {
499
+
if w.currentWriter == nil {
500
+
return nil
501
+
}
502
+
var earlyWeakErrors []error
503
+
err := w.currentWriter.Close()
504
+
if err != nil {
505
+
earlyWeakErrors = append(earlyWeakErrors, err)
506
+
}
507
+
w.currentWriter = nil
508
+
w.currentBytes = 0
509
+
w.currentPath = ""
510
+
if w.currentPathCurrent != "" {
511
+
err = os.Remove(w.currentPathCurrent) // not really an error until something else goes wrong
512
+
if err != nil {
513
+
earlyWeakErrors = append(earlyWeakErrors, err)
514
+
}
515
+
w.currentPathCurrent = ""
516
+
}
517
+
return earlyWeakErrors
518
+
}
519
+
520
+
func (w *logRotateWriter) openNewLog(earlyWeakErrors []error) (badErr error, weakErrors []error) {
521
+
nowMillis := time.Now().UnixMilli()
522
+
nows := strconv.FormatInt(nowMillis, 10)
523
+
w.currentPath = strings.Replace(w.outPathTemplate, "%T", nows, -1)
524
+
var err error
525
+
w.currentWriter, err = os.Create(w.currentPath)
526
+
if err != nil {
527
+
earlyWeakErrors = append(earlyWeakErrors, err)
528
+
return errors.Join(earlyWeakErrors...), nil
529
+
}
530
+
w.logaround <- fmt.Sprintf("new log file %#v", w.currentPath)
531
+
w.cleanOldLogs()
532
+
dirpart, _ := filepath.Split(w.currentPath)
533
+
if dirpart != "" {
534
+
w.currentPathCurrent = filepath.Join(dirpart, "current")
535
+
fi, err := os.Stat(w.currentPathCurrent)
536
+
if err == nil && fi.Mode().IsRegular() {
537
+
// move aside unknown "current" from a previous run
538
+
// see also currentMatcher regexp current_\d+
539
+
err = os.Rename(w.currentPathCurrent, w.currentPathCurrent+"_"+nows)
540
+
if err != nil {
541
+
// not crucial if we can't move aside "current"
542
+
// TODO: log warning ... but not from inside log writer?
543
+
earlyWeakErrors = append(earlyWeakErrors, err)
544
+
}
545
+
}
546
+
err = os.Link(w.currentPath, w.currentPathCurrent)
547
+
if err != nil {
548
+
// not crucial if we can't make "current" link
549
+
// TODO: log warning ... but not from inside log writer?
550
+
earlyWeakErrors = append(earlyWeakErrors, err)
551
+
}
552
+
}
553
+
return nil, earlyWeakErrors
554
+
}
555
+
556
+
func (w *logRotateWriter) Write(p []byte) (n int, err error) {
557
+
var earlyWeakErrors []error
558
+
if int64(len(p))+w.currentBytes > w.rotateBytes {
559
+
// next write would be over the limit
560
+
earlyWeakErrors = w.closeOldLog()
561
+
}
562
+
if w.currentWriter == nil {
563
+
// start new log file
564
+
var err error
565
+
err, earlyWeakErrors = w.openNewLog(earlyWeakErrors)
566
+
if err != nil {
567
+
return 0, err
568
+
}
569
+
}
570
+
var wrote int
571
+
wrote, err = w.currentWriter.Write(p)
572
+
w.currentBytes += int64(wrote)
573
+
if err != nil {
574
+
earlyWeakErrors = append(earlyWeakErrors, err)
575
+
return wrote, errors.Join(earlyWeakErrors...)
576
+
}
577
+
if earlyWeakErrors != nil {
578
+
w.logaround <- fmt.Sprintf("ok, but: %s", errors.Join(earlyWeakErrors...).Error())
579
+
}
580
+
return wrote, nil
581
+
}