fork of indigo with slightly nicer lexgen

refactor rules to use new helpers package

+6 -5
automod/rules/harassment.go
··· 8 8 "github.com/bluesky-social/indigo/atproto/syntax" 9 9 "github.com/bluesky-social/indigo/automod" 10 10 "github.com/bluesky-social/indigo/automod/countstore" 11 + "github.com/bluesky-social/indigo/automod/helpers" 11 12 ) 12 13 13 14 var _ automod.PostRuleFunc = HarassmentTargetInteractionPostRule 14 15 15 16 // looks for new accounts, which interact with frequently-harassed accounts, and report them for review 16 17 func HarassmentTargetInteractionPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { 17 - if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 24*time.Hour) { 18 + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(&c.AccountContext, 24*time.Hour) { 18 19 return nil 19 20 } 20 21 21 22 var interactionDIDs []string 22 - facets, err := ExtractFacets(post) 23 + facets, err := helpers.ExtractFacets(post) 23 24 if err != nil { 24 25 return err 25 26 } ··· 28 29 interactionDIDs = append(interactionDIDs, *pf.DID) 29 30 } 30 31 } 31 - if post.Reply != nil && !IsSelfThread(c, post) { 32 + if post.Reply != nil && !helpers.IsSelfThread(c, post) { 32 33 parentURI, err := syntax.ParseATURI(post.Reply.Parent.Uri) 33 34 if err != nil { 34 35 return err ··· 57 58 return nil 58 59 } 59 60 60 - interactionDIDs = dedupeStrings(interactionDIDs) 61 + interactionDIDs = helpers.DedupeStrings(interactionDIDs) 61 62 for _, d := range interactionDIDs { 62 63 did, err := syntax.ParseDID(d) 63 64 if err != nil { ··· 114 115 115 116 // looks for new accounts, which frequently post the same type of content 116 117 func HarassmentTrivialPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { 117 - if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 7*24*time.Hour) { 118 + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(&c.AccountContext, 7*24*time.Hour) { 118 119 return nil 119 120 } 120 121
+4 -3
automod/rules/hashtags.go
··· 5 5 6 6 appbsky "github.com/bluesky-social/indigo/api/bsky" 7 7 "github.com/bluesky-social/indigo/automod" 8 + "github.com/bluesky-social/indigo/automod/helpers" 8 9 "github.com/bluesky-social/indigo/automod/keyword" 9 10 ) 10 11 11 12 // looks for specific hashtags from known lists 12 13 func BadHashtagsPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { 13 - for _, tag := range ExtractHashtagsPost(post) { 14 - tag = NormalizeHashtag(tag) 14 + for _, tag := range helpers.ExtractHashtagsPost(post) { 15 + tag = helpers.NormalizeHashtag(tag) 15 16 // skip some bad-word hashtags which frequently false-positive 16 17 if tag == "nazi" || tag == "hitler" { 17 18 continue ··· 35 36 36 37 // if a post is "almost all" hashtags, it might be a form of search spam 37 38 func TooManyHashtagsPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { 38 - tags := ExtractHashtagsPost(post) 39 + tags := helpers.ExtractHashtagsPost(post) 39 40 tagChars := 0 40 41 for _, tag := range tags { 41 42 tagChars += len(tag)
+2 -1
automod/rules/identity.go
··· 7 7 8 8 "github.com/bluesky-social/indigo/automod" 9 9 "github.com/bluesky-social/indigo/automod/countstore" 10 + "github.com/bluesky-social/indigo/automod/helpers" 10 11 ) 11 12 12 13 // triggers on first identity event for an account (DID) 13 14 func NewAccountRule(c *automod.AccountContext) error { 14 - if c.Account.Identity == nil || !AccountIsYoungerThan(c, 4*time.Hour) { 15 + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(c, 4*time.Hour) { 15 16 return nil 16 17 } 17 18
+5 -4
automod/rules/keyword.go
··· 7 7 8 8 appbsky "github.com/bluesky-social/indigo/api/bsky" 9 9 "github.com/bluesky-social/indigo/automod" 10 + "github.com/bluesky-social/indigo/automod/helpers" 10 11 "github.com/bluesky-social/indigo/automod/keyword" 11 12 ) 12 13 ··· 17 18 isJapanese = true 18 19 } 19 20 } 20 - for _, tok := range ExtractTextTokensPost(post) { 21 + for _, tok := range helpers.ExtractTextTokensPost(post) { 21 22 word := keyword.SlugIsExplicitSlur(tok) 22 23 // used very frequently in a reclaimed context 23 24 if word != "" && word != "faggot" && word != "tranny" && word != "coon" && !(word == "kike" && isJapanese) { ··· 54 55 //c.Notify("slack") 55 56 } 56 57 } 57 - for _, tok := range ExtractTextTokensProfile(profile) { 58 + for _, tok := range helpers.ExtractTextTokensProfile(profile) { 58 59 // de-pluralize 59 60 tok = strings.TrimSuffix(tok, "s") 60 61 if c.InSet("worst-words", tok) { ··· 71 72 72 73 // looks for the specific harassment situation of a replay to another user with only a single word 73 74 func ReplySingleBadWordPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { 74 - if post.Reply != nil && !IsSelfThread(c, post) { 75 - tokens := ExtractTextTokensPost(post) 75 + if post.Reply != nil && !helpers.IsSelfThread(c, post) { 76 + tokens := helpers.ExtractTextTokensPost(post) 76 77 if len(tokens) != 1 { 77 78 return nil 78 79 }
+2 -1
automod/rules/mentions.go
··· 8 8 "github.com/bluesky-social/indigo/atproto/syntax" 9 9 "github.com/bluesky-social/indigo/automod" 10 10 "github.com/bluesky-social/indigo/automod/countstore" 11 + "github.com/bluesky-social/indigo/automod/helpers" 11 12 ) 12 13 13 14 var _ automod.PostRuleFunc = DistinctMentionsRule ··· 47 48 var _ automod.PostRuleFunc = YoungAccountDistinctMentionsRule 48 49 49 50 func YoungAccountDistinctMentionsRule(c *automod.RecordContext, post *appbsky.FeedPost) error { 50 - if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 14*24*time.Hour) { 51 + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(&c.AccountContext, 14*24*time.Hour) { 51 52 return nil 52 53 } 53 54
+4 -3
automod/rules/misleading.go
··· 9 9 appbsky "github.com/bluesky-social/indigo/api/bsky" 10 10 "github.com/bluesky-social/indigo/atproto/syntax" 11 11 "github.com/bluesky-social/indigo/automod" 12 + "github.com/bluesky-social/indigo/automod/helpers" 12 13 ) 13 14 14 - func isMisleadingURLFacet(facet PostFacet, logger *slog.Logger) bool { 15 + func isMisleadingURLFacet(facet helpers.PostFacet, logger *slog.Logger) bool { 15 16 linkURL, err := url.Parse(*facet.URL) 16 17 if err != nil { 17 18 logger.Warn("invalid link metadata URL", "url", facet.URL) ··· 84 85 if c.Account.Identity.Handle == "nowbreezing.ntw.app" { 85 86 return nil 86 87 } 87 - facets, err := ExtractFacets(post) 88 + facets, err := helpers.ExtractFacets(post) 88 89 if err != nil { 89 90 c.Logger.Warn("invalid facets", "err", err) 90 91 // TODO: or some other "this record is corrupt" indicator? ··· 105 106 var _ automod.PostRuleFunc = MisleadingMentionPostRule 106 107 107 108 func MisleadingMentionPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { 108 - facets, err := ExtractFacets(post) 109 + facets, err := helpers.ExtractFacets(post) 109 110 if err != nil { 110 111 c.Logger.Warn("invalid facets", "err", err) 111 112 // TODO: or some other "this record is corrupt" indicator?
+11 -10
automod/rules/misleading_test.go
··· 11 11 "github.com/bluesky-social/indigo/atproto/syntax" 12 12 "github.com/bluesky-social/indigo/automod" 13 13 "github.com/bluesky-social/indigo/automod/engine" 14 + "github.com/bluesky-social/indigo/automod/helpers" 14 15 15 16 "github.com/stretchr/testify/assert" 16 17 ) ··· 118 119 logger := slog.Default() 119 120 120 121 fixtures := []struct { 121 - facet PostFacet 122 + facet helpers.PostFacet 122 123 out bool 123 124 }{ 124 125 { 125 - facet: PostFacet{ 126 + facet: helpers.PostFacet{ 126 127 Text: "https://atproto.com", 127 128 URL: pstr("https://atproto.com"), 128 129 }, 129 130 out: false, 130 131 }, 131 132 { 132 - facet: PostFacet{ 133 + facet: helpers.PostFacet{ 133 134 Text: "https://atproto.com", 134 135 URL: pstr("https://evil.com"), 135 136 }, 136 137 out: true, 137 138 }, 138 139 { 139 - facet: PostFacet{ 140 + facet: helpers.PostFacet{ 140 141 Text: "https://www.atproto.com", 141 142 URL: pstr("https://atproto.com"), 142 143 }, 143 144 out: false, 144 145 }, 145 146 { 146 - facet: PostFacet{ 147 + facet: helpers.PostFacet{ 147 148 Text: "https://atproto.com", 148 149 URL: pstr("https://www.atproto.com"), 149 150 }, 150 151 out: false, 151 152 }, 152 153 { 153 - facet: PostFacet{ 154 + facet: helpers.PostFacet{ 154 155 Text: "[example.com]", 155 156 URL: pstr("https://www.example.com"), 156 157 }, 157 158 out: false, 158 159 }, 159 160 { 160 - facet: PostFacet{ 161 + facet: helpers.PostFacet{ 161 162 Text: "example.com...", 162 163 URL: pstr("https://example.com.evil.com"), 163 164 }, 164 165 out: true, 165 166 }, 166 167 { 167 - facet: PostFacet{ 168 + facet: helpers.PostFacet{ 168 169 Text: "ATPROTO.com...", 169 170 URL: pstr("https://atproto.com"), 170 171 }, 171 172 out: false, 172 173 }, 173 174 { 174 - facet: PostFacet{ 175 + facet: helpers.PostFacet{ 175 176 Text: "1234.5678", 176 177 URL: pstr("https://arxiv.org/abs/1234.5678"), 177 178 }, 178 179 out: false, 179 180 }, 180 181 { 181 - facet: PostFacet{ 182 + facet: helpers.PostFacet{ 182 183 Text: "www.techdirt.com…", 183 184 URL: pstr("https://www.techdirt.com/"), 184 185 },
+2 -1
automod/rules/nostr.go
··· 7 7 8 8 appbsky "github.com/bluesky-social/indigo/api/bsky" 9 9 "github.com/bluesky-social/indigo/automod" 10 + "github.com/bluesky-social/indigo/automod/helpers" 10 11 ) 11 12 12 13 var _ automod.PostRuleFunc = NostrSpamPostRule 13 14 14 15 // looks for new accounts, which frequently post the same type of content 15 16 func NostrSpamPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { 16 - if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 2*24*time.Hour) { 17 + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(&c.AccountContext, 2*24*time.Hour) { 17 18 return nil 18 19 } 19 20
+5 -4
automod/rules/promo.go
··· 9 9 appbsky "github.com/bluesky-social/indigo/api/bsky" 10 10 "github.com/bluesky-social/indigo/automod" 11 11 "github.com/bluesky-social/indigo/automod/countstore" 12 + "github.com/bluesky-social/indigo/automod/helpers" 12 13 ) 13 14 14 15 var _ automod.PostRuleFunc = AggressivePromotionRule ··· 17 18 // 18 19 // this rule depends on ReplyCountPostRule() to set counts 19 20 func AggressivePromotionRule(c *automod.RecordContext, post *appbsky.FeedPost) error { 20 - if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 7*24*time.Hour) { 21 + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(&c.AccountContext, 7*24*time.Hour) { 21 22 return nil 22 23 } 23 - if post.Reply == nil || IsSelfThread(c, post) { 24 + if post.Reply == nil || helpers.IsSelfThread(c, post) { 24 25 return nil 25 26 } 26 27 27 - allURLs := ExtractTextURLs(post.Text) 28 + allURLs := helpers.ExtractTextURLs(post.Text) 28 29 if c.Account.Profile.Description != nil { 29 - profileURLs := ExtractTextURLs(*c.Account.Profile.Description) 30 + profileURLs := helpers.ExtractTextURLs(*c.Account.Profile.Description) 30 31 allURLs = append(allURLs, profileURLs...) 31 32 } 32 33 hasPromo := false
+3 -2
automod/rules/quick.go
··· 7 7 8 8 appbsky "github.com/bluesky-social/indigo/api/bsky" 9 9 "github.com/bluesky-social/indigo/automod" 10 + "github.com/bluesky-social/indigo/automod/helpers" 10 11 ) 11 12 12 13 var botLinkStrings = []string{"ainna13762491", "LINK押して", "→ https://tiny", "⇒ http://tiny"} ··· 54 55 var _ automod.IdentityRuleFunc = NewAccountBotEmailRule 55 56 56 57 func NewAccountBotEmailRule(c *automod.AccountContext) error { 57 - if c.Account.Identity == nil || !AccountIsYoungerThan(c, 1*time.Hour) { 58 + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(c, 1*time.Hour) { 58 59 return nil 59 60 } 60 61 ··· 73 74 74 75 // looks for new accounts, which frequently post the same type of content 75 76 func TrivialSpamPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { 76 - if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 8*24*time.Hour) { 77 + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(&c.AccountContext, 8*24*time.Hour) { 77 78 return nil 78 79 } 79 80
+13 -12
automod/rules/replies.go
··· 9 9 "github.com/bluesky-social/indigo/atproto/syntax" 10 10 "github.com/bluesky-social/indigo/automod" 11 11 "github.com/bluesky-social/indigo/automod/countstore" 12 + "github.com/bluesky-social/indigo/automod/helpers" 12 13 ) 13 14 14 15 var _ automod.PostRuleFunc = ReplyCountPostRule 15 16 16 17 // does not count "self-replies" (direct to self, or in own post thread) 17 18 func ReplyCountPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { 18 - if post.Reply == nil || IsSelfThread(c, post) { 19 + if post.Reply == nil || helpers.IsSelfThread(c, post) { 19 20 return nil 20 21 } 21 22 ··· 47 48 // 48 49 // There can be legitimate situations that trigger this rule, so in most situations should be a "report" not "label" action. 49 50 func IdenticalReplyPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { 50 - if post.Reply == nil || IsSelfThread(c, post) { 51 + if post.Reply == nil || helpers.IsSelfThread(c, post) { 51 52 return nil 52 53 } 53 54 ··· 55 56 if utf8.RuneCountInString(post.Text) <= 10 { 56 57 return nil 57 58 } 58 - if AccountIsOlderThan(&c.AccountContext, 14*24*time.Hour) { 59 + if helpers.AccountIsOlderThan(&c.AccountContext, 14*24*time.Hour) { 59 60 return nil 60 61 } 61 62 62 63 // don't count if there is a follow-back relationship 63 - if ParentOrRootIsFollower(c, post) { 64 + if helpers.ParentOrRootIsFollower(c, post) { 64 65 return nil 65 66 } 66 67 67 68 // increment before read. use a specific period (IncrementPeriod()) to reduce the number of counters (one per unique post text) 68 69 period := countstore.PeriodDay 69 - bucket := c.Account.Identity.DID.String() + "/" + HashOfString(post.Text) 70 + bucket := c.Account.Identity.DID.String() + "/" + helpers.HashOfString(post.Text) 70 71 c.IncrementPeriod("reply-text", bucket, period) 71 72 72 73 count := c.GetCount("reply-text", bucket, period) ··· 91 92 var _ automod.PostRuleFunc = IdenticalReplyPostSameParentRule 92 93 93 94 func IdenticalReplyPostSameParentRule(c *automod.RecordContext, post *appbsky.FeedPost) error { 94 - if post.Reply == nil || IsSelfThread(c, post) { 95 + if post.Reply == nil || helpers.IsSelfThread(c, post) { 95 96 return nil 96 97 } 97 98 98 - if ParentOrRootIsFollower(c, post) { 99 + if helpers.ParentOrRootIsFollower(c, post) { 99 100 return nil 100 101 } 101 102 102 103 postCount := c.Account.PostsCount 103 - if AccountIsOlderThan(&c.AccountContext, identicalReplySameParentMaxAge) || postCount >= identicalReplySameParentMaxPosts { 104 + if helpers.AccountIsOlderThan(&c.AccountContext, identicalReplySameParentMaxAge) || postCount >= identicalReplySameParentMaxPosts { 104 105 return nil 105 106 } 106 107 107 108 period := countstore.PeriodHour 108 - bucket := c.Account.Identity.DID.String() + "/" + post.Reply.Parent.Uri + "/" + HashOfString(post.Text) 109 + bucket := c.Account.Identity.DID.String() + "/" + post.Reply.Parent.Uri + "/" + helpers.HashOfString(post.Text) 109 110 c.IncrementPeriod("reply-text-same-post", bucket, period) 110 111 111 112 count := c.GetCount("reply-text-same-post", bucket, period) ··· 126 127 127 128 func YoungAccountDistinctRepliesRule(c *automod.RecordContext, post *appbsky.FeedPost) error { 128 129 // only replies, and skip self-replies (eg, threads) 129 - if post.Reply == nil || IsSelfThread(c, post) { 130 + if post.Reply == nil || helpers.IsSelfThread(c, post) { 130 131 return nil 131 132 } 132 133 ··· 134 135 if utf8.RuneCountInString(post.Text) <= 10 { 135 136 return nil 136 137 } 137 - if AccountIsOlderThan(&c.AccountContext, 14*24*time.Hour) { 138 + if helpers.AccountIsOlderThan(&c.AccountContext, 14*24*time.Hour) { 138 139 return nil 139 140 } 140 141 141 142 // don't count if there is a follow-back relationship 142 - if ParentOrRootIsFollower(c, post) { 143 + if helpers.ParentOrRootIsFollower(c, post) { 143 144 return nil 144 145 } 145 146
+2 -1
automod/rules/reposts.go
··· 7 7 8 8 "github.com/bluesky-social/indigo/automod" 9 9 "github.com/bluesky-social/indigo/automod/countstore" 10 + "github.com/bluesky-social/indigo/automod/helpers" 10 11 ) 11 12 12 13 var dailyRepostThresholdWithoutPost = 30 ··· 18 19 // looks for accounts which do frequent reposts 19 20 func TooManyRepostRule(c *automod.RecordContext) error { 20 21 // Don't bother checking reposts from accounts older than 30 days 21 - if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 30*24*time.Hour) { 22 + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(&c.AccountContext, 30*24*time.Hour) { 22 23 return nil 23 24 } 24 25
+2 -2
automod/visual/hiveai_rule.go
··· 5 5 "time" 6 6 7 7 "github.com/bluesky-social/indigo/automod" 8 - "github.com/bluesky-social/indigo/automod/rules" 8 + "github.com/bluesky-social/indigo/automod/helpers" 9 9 lexutil "github.com/bluesky-social/indigo/lex/util" 10 10 ) 11 11 ··· 43 43 44 44 for _, l := range labels { 45 45 // NOTE: experimenting with profile reporting for new accounts 46 - if l == "sexual" && c.RecordOp.Collection.String() == "app.bsky.actor.profile" && rules.AccountIsYoungerThan(&c.AccountContext, 2*24*time.Hour) { 46 + if l == "sexual" && c.RecordOp.Collection.String() == "app.bsky.actor.profile" && helpers.AccountIsYoungerThan(&c.AccountContext, 2*24*time.Hour) { 47 47 c.ReportRecord(automod.ReportReasonSexual, "possible sexual profile (not labeled yet)") 48 48 c.Logger.Info("skipping record label", "label", l, "reason", "sexual-profile-experiment") 49 49 } else {