fork of indigo with slightly nicer lexgen
at main 4.1 kB view raw
1package rules 2 3import ( 4 "log/slog" 5 "net/url" 6 "strings" 7 "unicode" 8 9 appbsky "github.com/bluesky-social/indigo/api/bsky" 10 "github.com/bluesky-social/indigo/atproto/syntax" 11 "github.com/bluesky-social/indigo/automod" 12 "github.com/bluesky-social/indigo/automod/helpers" 13) 14 15func isMisleadingURLFacet(facet helpers.PostFacet, logger *slog.Logger) bool { 16 linkURL, err := url.Parse(*facet.URL) 17 if err != nil { 18 logger.Warn("invalid link metadata URL", "url", facet.URL) 19 return false 20 } 21 22 // basic text string pre-cleanups 23 text := strings.ToLower(strings.TrimSpace(facet.Text)) 24 25 // remove square brackets 26 if strings.HasPrefix(text, "[") && strings.HasSuffix(text, "]") { 27 text = text[1 : len(text)-1] 28 } 29 30 // truncated and not an obvious prefix hack (TODO: more special domains? regex?) 31 if strings.HasSuffix(text, "...") && !strings.HasSuffix(text, ".com...") && !strings.HasSuffix(text, ".org...") { 32 return false 33 } 34 if strings.HasSuffix(text, "…") && !strings.HasSuffix(text, ".com…") && !strings.HasSuffix(text, ".org…") { 35 return false 36 } 37 38 // remove any other truncation suffix 39 text = strings.TrimSuffix(strings.TrimSuffix(text, "..."), "…") 40 41 if len(text) == 0 { 42 logger.Warn("empty facet text", "text", facet.Text) 43 return false 44 } 45 46 // if really not-a-domain, just skip 47 if !strings.Contains(text, ".") { 48 return false 49 } 50 51 // hostnames can't start with a digit (eg, arxiv or DOI links) 52 for _, c := range text[0:1] { 53 if unicode.IsNumber(c) { 54 return false 55 } 56 } 57 58 // try to fix any missing method in the text 59 if !strings.Contains(text, "://") { 60 text = "https://" + text 61 } 62 63 // try parsing as a full URL (with whitespace trimmed) 64 textURL, err := url.Parse(text) 65 if err != nil { 66 logger.Warn("invalid link text URL", "url", facet.Text) 67 return false 68 } 69 70 // for now just compare domains to handle the most obvious cases 71 // this public code will obviously get discovered and bypassed. this doesn't earn you any security cred! 72 linkHost := strings.TrimPrefix(strings.ToLower(linkURL.Host), "www.") 73 textHost := strings.TrimPrefix(strings.ToLower(textURL.Host), "www.") 74 if textHost != linkHost { 75 logger.Warn("misleading mismatched domains", "linkHost", linkURL.Host, "textHost", textURL.Host, "text", facet.Text) 76 return true 77 } 78 return false 79} 80 81var _ automod.PostRuleFunc = MisleadingURLPostRule 82 83func MisleadingURLPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { 84 // TODO: make this an InSet() config? 85 if c.Account.Identity.Handle == "nowbreezing.ntw.app" { 86 return nil 87 } 88 facets, err := helpers.ExtractFacets(post) 89 if err != nil { 90 c.Logger.Warn("invalid facets", "err", err) 91 // TODO: or some other "this record is corrupt" indicator? 92 //c.AddRecordFlag("broken-post") 93 return nil 94 } 95 for _, facet := range facets { 96 if facet.URL != nil { 97 if isMisleadingURLFacet(facet, c.Logger) { 98 c.AddRecordFlag("misleading-link") 99 c.Notify("slack") 100 } 101 } 102 } 103 return nil 104} 105 106var _ automod.PostRuleFunc = MisleadingMentionPostRule 107 108func MisleadingMentionPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { 109 facets, err := helpers.ExtractFacets(post) 110 if err != nil { 111 c.Logger.Warn("invalid facets", "err", err) 112 // TODO: or some other "this record is corrupt" indicator? 113 //c.AddRecordFlag("broken-post") 114 return nil 115 } 116 for _, facet := range facets { 117 if facet.DID != nil { 118 txt := facet.Text 119 if txt[0] == '@' { 120 txt = txt[1:] 121 } 122 handle, err := syntax.ParseHandle(strings.ToLower(txt)) 123 if err != nil { 124 c.Logger.Warn("mention was not a valid handle", "text", txt) 125 continue 126 } 127 128 mentioned, err := c.Directory().LookupHandle(c.Ctx, handle) 129 if err != nil { 130 c.Logger.Warn("could not resolve handle", "handle", handle) 131 c.AddRecordFlag("broken-mention") 132 c.Notify("slack") 133 break 134 } 135 136 // TODO: check if mentioned DID was recently updated? might be a caching issue 137 if mentioned.DID.String() != *facet.DID { 138 c.Logger.Warn("misleading mention", "text", txt, "did", facet.DID) 139 c.AddRecordFlag("misleading-mention") 140 c.Notify("slack") 141 continue 142 } 143 } 144 } 145 return nil 146}