Weighs the soul of incoming HTTP requests to stop AI crawlers
at main 7.8 kB view raw
1package lib 2 3import ( 4 "fmt" 5 "math/rand" 6 "net/http" 7 "regexp" 8 "slices" 9 "strings" 10 "time" 11 12 "github.com/TecharoHQ/anubis" 13 "github.com/TecharoHQ/anubis/internal" 14 "github.com/TecharoHQ/anubis/lib/challenge" 15 "github.com/TecharoHQ/anubis/lib/localization" 16 "github.com/TecharoHQ/anubis/lib/policy" 17 "github.com/TecharoHQ/anubis/web" 18 "github.com/a-h/templ" 19 "github.com/golang-jwt/jwt/v5" 20 "golang.org/x/net/publicsuffix" 21) 22 23var domainMatchRegexp = regexp.MustCompile(`^((xn--)?[a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,}$`) 24 25type CookieOpts struct { 26 Value string 27 Host string 28 Path string 29 Name string 30 Expiry time.Duration 31} 32 33func (s *Server) SetCookie(w http.ResponseWriter, cookieOpts CookieOpts) { 34 var domain = s.opts.CookieDomain 35 var name = anubis.CookieName 36 var path = "/" 37 if cookieOpts.Name != "" { 38 name = cookieOpts.Name 39 } 40 if cookieOpts.Path != "" { 41 path = cookieOpts.Path 42 } 43 if s.opts.CookieDynamicDomain && domainMatchRegexp.MatchString(cookieOpts.Host) { 44 if etld, err := publicsuffix.EffectiveTLDPlusOne(cookieOpts.Host); err == nil { 45 domain = etld 46 } 47 } 48 49 if cookieOpts.Expiry == 0 { 50 cookieOpts.Expiry = s.opts.CookieExpiration 51 } 52 53 http.SetCookie(w, &http.Cookie{ 54 Name: name, 55 Value: cookieOpts.Value, 56 Expires: time.Now().Add(cookieOpts.Expiry), 57 SameSite: http.SameSiteNoneMode, 58 Domain: domain, 59 Secure: s.opts.CookieSecure, 60 Partitioned: s.opts.CookiePartitioned, 61 Path: path, 62 }) 63} 64 65func (s *Server) ClearCookie(w http.ResponseWriter, cookieOpts CookieOpts) { 66 var domain = s.opts.CookieDomain 67 var name = anubis.CookieName 68 var path = "/" 69 if cookieOpts.Name != "" { 70 name = cookieOpts.Name 71 } 72 if cookieOpts.Path != "" { 73 path = cookieOpts.Path 74 } 75 if s.opts.CookieDynamicDomain && domainMatchRegexp.MatchString(cookieOpts.Host) { 76 if etld, err := publicsuffix.EffectiveTLDPlusOne(cookieOpts.Host); err == nil { 77 domain = etld 78 } 79 } 80 81 http.SetCookie(w, &http.Cookie{ 82 Name: name, 83 Value: "", 84 MaxAge: -1, 85 Expires: time.Now().Add(-1 * time.Minute), 86 SameSite: http.SameSiteNoneMode, 87 Partitioned: s.opts.CookiePartitioned, 88 Domain: domain, 89 Secure: s.opts.CookieSecure, 90 Path: path, 91 }) 92} 93 94// https://github.com/oauth2-proxy/oauth2-proxy/blob/master/pkg/upstream/http.go#L124 95type UnixRoundTripper struct { 96 Transport *http.Transport 97} 98 99// set bare minimum stuff 100func (t UnixRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { 101 req = req.Clone(req.Context()) 102 if req.Host == "" { 103 req.Host = "localhost" 104 } 105 req.URL.Host = req.Host // proxy error: no Host in request URL 106 req.URL.Scheme = "http" // make http.Transport happy and avoid an infinite recursion 107 return t.Transport.RoundTrip(req) 108} 109 110func randomChance(n int) bool { 111 return rand.Intn(n) == 0 112} 113 114func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, rule *policy.Bot, returnHTTPStatusOnly bool) { 115 localizer := localization.GetLocalizer(r) 116 117 if returnHTTPStatusOnly { 118 w.WriteHeader(http.StatusUnauthorized) 119 w.Write([]byte(localizer.T("authorization_required"))) 120 return 121 } 122 123 lg := internal.GetRequestLogger(r) 124 125 if !strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") && randomChance(64) { 126 lg.Error("client was given a challenge but does not in fact support gzip compression") 127 s.respondWithError(w, r, localizer.T("client_error_browser")) 128 } 129 130 challengesIssued.WithLabelValues("embedded").Add(1) 131 chall, err := s.challengeFor(r) 132 if err != nil { 133 lg.Error("can't get challenge", "err", "err") 134 s.respondWithError(w, r, fmt.Sprintf("%s: %s", localizer.T("internal_server_error"), rule.Challenge.Algorithm)) 135 return 136 } 137 138 var ogTags map[string]string = nil 139 if s.opts.OpenGraph.Enabled { 140 var err error 141 ogTags, err = s.OGTags.GetOGTags(r.Context(), r.URL, r.Host) 142 if err != nil { 143 lg.Error("failed to get OG tags", "err", err) 144 } 145 } 146 147 s.SetCookie(w, CookieOpts{ 148 Value: chall.ID, 149 Host: r.Host, 150 Path: "/", 151 Name: anubis.TestCookieName, 152 Expiry: 30 * time.Minute, 153 }) 154 155 impl, ok := challenge.Get(rule.Challenge.Algorithm) 156 if !ok { 157 lg.Error("check failed", "err", "can't get algorithm", "algorithm", rule.Challenge.Algorithm) 158 s.respondWithError(w, r, fmt.Sprintf("%s: %s", localizer.T("internal_server_error"), rule.Challenge.Algorithm)) 159 return 160 } 161 162 in := &challenge.IssueInput{ 163 Impressum: s.policy.Impressum, 164 Rule: rule, 165 Challenge: chall, 166 OGTags: ogTags, 167 Store: s.store, 168 } 169 170 component, err := impl.Issue(r, lg, in) 171 if err != nil { 172 lg.Error("[unexpected] render failed, please open an issue", "err", err) // This is likely a bug in the template. Should never be triggered as CI tests for this. 173 s.respondWithError(w, r, fmt.Sprintf("%s \"RenderIndex\"", localizer.T("internal_server_error"))) 174 return 175 } 176 177 handler := internal.GzipMiddleware(1, internal.NoStoreCache(templ.Handler( 178 component, 179 templ.WithStatus(s.opts.Policy.StatusCodes.Challenge), 180 ))) 181 handler.ServeHTTP(w, r) 182} 183 184func (s *Server) RenderBench(w http.ResponseWriter, r *http.Request) { 185 localizer := localization.GetLocalizer(r) 186 187 templ.Handler( 188 web.Base(localizer.T("benchmarking_anubis"), web.Bench(localizer), s.policy.Impressum, localizer), 189 ).ServeHTTP(w, r) 190} 191 192func (s *Server) respondWithError(w http.ResponseWriter, r *http.Request, message string) { 193 s.respondWithStatus(w, r, message, http.StatusInternalServerError) 194} 195 196func (s *Server) respondWithStatus(w http.ResponseWriter, r *http.Request, msg string, status int) { 197 localizer := localization.GetLocalizer(r) 198 199 templ.Handler(web.Base(localizer.T("oh_noes"), web.ErrorPage(msg, s.opts.WebmasterEmail, localizer), s.policy.Impressum, localizer), templ.WithStatus(status)).ServeHTTP(w, r) 200} 201 202func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { 203 s.mux.ServeHTTP(w, r) 204} 205 206func (s *Server) stripBasePrefixFromRequest(r *http.Request) *http.Request { 207 if !s.opts.StripBasePrefix || s.opts.BasePrefix == "" { 208 return r 209 } 210 211 basePrefix := strings.TrimSuffix(s.opts.BasePrefix, "/") 212 path := r.URL.Path 213 214 if !strings.HasPrefix(path, basePrefix) { 215 return r 216 } 217 218 trimmedPath := strings.TrimPrefix(path, basePrefix) 219 if trimmedPath == "" { 220 trimmedPath = "/" 221 } 222 223 // Clone the request and URL 224 reqCopy := r.Clone(r.Context()) 225 urlCopy := *r.URL 226 urlCopy.Path = trimmedPath 227 reqCopy.URL = &urlCopy 228 229 return reqCopy 230} 231 232func (s *Server) ServeHTTPNext(w http.ResponseWriter, r *http.Request) { 233 if s.next == nil { 234 localizer := localization.GetLocalizer(r) 235 236 redir := r.FormValue("redir") 237 urlParsed, err := r.URL.Parse(redir) 238 if err != nil { 239 s.respondWithStatus(w, r, localizer.T("redirect_not_parseable"), http.StatusBadRequest) 240 return 241 } 242 243 if (len(urlParsed.Host) > 0 && len(s.opts.RedirectDomains) != 0 && !slices.Contains(s.opts.RedirectDomains, urlParsed.Host)) || urlParsed.Host != r.URL.Host { 244 s.respondWithStatus(w, r, localizer.T("redirect_domain_not_allowed"), http.StatusBadRequest) 245 return 246 } 247 248 if redir != "" { 249 http.Redirect(w, r, redir, http.StatusFound) 250 return 251 } 252 253 templ.Handler( 254 web.Base(localizer.T("you_are_not_a_bot"), web.StaticHappy(localizer), s.policy.Impressum, localizer), 255 ).ServeHTTP(w, r) 256 } else { 257 requestsProxied.WithLabelValues(r.Host).Inc() 258 r = s.stripBasePrefixFromRequest(r) 259 s.next.ServeHTTP(w, r) 260 } 261} 262 263func (s *Server) signJWT(claims jwt.MapClaims) (string, error) { 264 claims["iat"] = time.Now().Unix() 265 claims["nbf"] = time.Now().Add(-1 * time.Minute).Unix() 266 claims["exp"] = time.Now().Add(s.opts.CookieExpiration).Unix() 267 268 if len(s.hs512Secret) == 0 { 269 return jwt.NewWithClaims(jwt.SigningMethodEdDSA, claims).SignedString(s.ed25519Priv) 270 } else { 271 return jwt.NewWithClaims(jwt.SigningMethodHS512, claims).SignedString(s.hs512Secret) 272 } 273}