grain.social is a photo sharing platform built on atproto.

feat: block the robots

Changed files
+102
services
nginx
src
+4
services/nginx/nginx.conf
··· 25 25 listen 80; 26 26 server_name *.grain.social; 27 27 28 + if ($http_user_agent ~* "(AI2Bot|Ai2Bot\-Dolma|aiHitBot|Amazonbot|Andibot|anthropic\-ai|Applebot|Applebot\-Extended|bedrockbot|Brightbot\ 1\.0|Bytespider|CCBot|ChatGPT\-User|Claude\-SearchBot|Claude\-User|Claude\-Web|ClaudeBot|cohere\-ai|cohere\-training\-data\-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|EchoboxBot|FacebookBot|facebookexternalhit|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google\-CloudVertexBot|Google\-Extended|GoogleOther|GoogleOther\-Image|GoogleOther\-Video|GPTBot|iaskspider/2\.0|ICC\-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo\ Bot|meta\-externalagent|Meta\-ExternalAgent|meta\-externalfetcher|Meta\-ExternalFetcher|MistralAI\-User/1\.0|MyCentralAIScraperBot|NovaAct|OAI\-SearchBot|omgili|omgilibot|Operator|PanguBot|Panscient|panscient\.com|Perplexity\-User|PerplexityBot|PetalBot|PhindBot|Poseidon\ Research\ Crawler|QualifiedBot|QuillBot|quillbot\.com|SBIntuitionsBot|Scrapy|SemrushBot|SemrushBot\-BA|SemrushBot\-CT|SemrushBot\-OCOB|SemrushBot\-SI|SemrushBot\-SWA|Sidetrade\ indexer\ bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio\-Extended|wpbot|YandexAdditional|YandexAdditionalBot|YouBot)") { 29 + return 403; 30 + } 31 + 28 32 location /xrpc { 29 33 proxy_pass $pds; 30 34 proxy_set_header Host $host;
+2
src/main.tsx
··· 18 18 import { handler as onboardHandler } from "./routes/onboard.tsx"; 19 19 import { handler as profileHandler } from "./routes/profile.tsx"; 20 20 import { handler as recordHandler } from "./routes/record.ts"; 21 + import { handler as robotsHandler } from "./routes/robots.tsx"; 21 22 import { handler as supportHandler } from "./routes/support.tsx"; 22 23 import { handler as timelineHandler } from "./routes/timeline.tsx"; 23 24 import { handler as uploadHandler } from "./routes/upload.tsx"; ··· 131 132 route("/actions/get-blob", ["GET"], actions.getBlob), 132 133 ...comments, 133 134 route("/:did/:collection/:rkey", recordHandler), 135 + route("/robots.txt", robotsHandler), 134 136 ], 135 137 });
+96
src/routes/robots.tsx
··· 1 + import { RouteHandler } from "@bigmoves/bff"; 2 + 3 + export const handler: RouteHandler = ( 4 + _req, 5 + _params, 6 + _ctx, 7 + ) => { 8 + return new Response(robots, { 9 + headers: { 10 + "Content-Type": "text/plain", 11 + }, 12 + }); 13 + }; 14 + 15 + const robots = `User-agent: AI2Bot 16 + User-agent: Ai2Bot-Dolma 17 + User-agent: aiHitBot 18 + User-agent: Amazonbot 19 + User-agent: Andibot 20 + User-agent: anthropic-ai 21 + User-agent: Applebot 22 + User-agent: Applebot-Extended 23 + User-agent: bedrockbot 24 + User-agent: Brightbot 1.0 25 + User-agent: Bytespider 26 + User-agent: CCBot 27 + User-agent: ChatGPT-User 28 + User-agent: Claude-SearchBot 29 + User-agent: Claude-User 30 + User-agent: Claude-Web 31 + User-agent: ClaudeBot 32 + User-agent: cohere-ai 33 + User-agent: cohere-training-data-crawler 34 + User-agent: Cotoyogi 35 + User-agent: Crawlspace 36 + User-agent: Diffbot 37 + User-agent: DuckAssistBot 38 + User-agent: EchoboxBot 39 + User-agent: FacebookBot 40 + User-agent: facebookexternalhit 41 + User-agent: Factset_spyderbot 42 + User-agent: FirecrawlAgent 43 + User-agent: FriendlyCrawler 44 + User-agent: Google-CloudVertexBot 45 + User-agent: Google-Extended 46 + User-agent: GoogleOther 47 + User-agent: GoogleOther-Image 48 + User-agent: GoogleOther-Video 49 + User-agent: GPTBot 50 + User-agent: iaskspider/2.0 51 + User-agent: ICC-Crawler 52 + User-agent: ImagesiftBot 53 + User-agent: img2dataset 54 + User-agent: ISSCyberRiskCrawler 55 + User-agent: Kangaroo Bot 56 + User-agent: meta-externalagent 57 + User-agent: Meta-ExternalAgent 58 + User-agent: meta-externalfetcher 59 + User-agent: Meta-ExternalFetcher 60 + User-agent: MistralAI-User/1.0 61 + User-agent: MyCentralAIScraperBot 62 + User-agent: NovaAct 63 + User-agent: OAI-SearchBot 64 + User-agent: omgili 65 + User-agent: omgilibot 66 + User-agent: Operator 67 + User-agent: PanguBot 68 + User-agent: Panscient 69 + User-agent: panscient.com 70 + User-agent: Perplexity-User 71 + User-agent: PerplexityBot 72 + User-agent: PetalBot 73 + User-agent: PhindBot 74 + User-agent: Poseidon Research Crawler 75 + User-agent: QualifiedBot 76 + User-agent: QuillBot 77 + User-agent: quillbot.com 78 + User-agent: SBIntuitionsBot 79 + User-agent: Scrapy 80 + User-agent: SemrushBot 81 + User-agent: SemrushBot-BA 82 + User-agent: SemrushBot-CT 83 + User-agent: SemrushBot-OCOB 84 + User-agent: SemrushBot-SI 85 + User-agent: SemrushBot-SWA 86 + User-agent: Sidetrade indexer bot 87 + User-agent: TikTokSpider 88 + User-agent: Timpibot 89 + User-agent: VelenPublicWebCrawler 90 + User-agent: Webzio-Extended 91 + User-agent: wpbot 92 + User-agent: YandexAdditional 93 + User-agent: YandexAdditionalBot 94 + User-agent: YouBot 95 + Disallow: / 96 + `;