+4
services/nginx/nginx.conf
+4
services/nginx/nginx.conf
···
25
25
listen 80;
26
26
server_name *.grain.social;
27
27
28
+
if ($http_user_agent ~* "(AI2Bot|Ai2Bot\-Dolma|aiHitBot|Amazonbot|Andibot|anthropic\-ai|Applebot|Applebot\-Extended|bedrockbot|Brightbot\ 1\.0|Bytespider|CCBot|ChatGPT\-User|Claude\-SearchBot|Claude\-User|Claude\-Web|ClaudeBot|cohere\-ai|cohere\-training\-data\-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|EchoboxBot|FacebookBot|facebookexternalhit|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google\-CloudVertexBot|Google\-Extended|GoogleOther|GoogleOther\-Image|GoogleOther\-Video|GPTBot|iaskspider/2\.0|ICC\-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo\ Bot|meta\-externalagent|Meta\-ExternalAgent|meta\-externalfetcher|Meta\-ExternalFetcher|MistralAI\-User/1\.0|MyCentralAIScraperBot|NovaAct|OAI\-SearchBot|omgili|omgilibot|Operator|PanguBot|Panscient|panscient\.com|Perplexity\-User|PerplexityBot|PetalBot|PhindBot|Poseidon\ Research\ Crawler|QualifiedBot|QuillBot|quillbot\.com|SBIntuitionsBot|Scrapy|SemrushBot|SemrushBot\-BA|SemrushBot\-CT|SemrushBot\-OCOB|SemrushBot\-SI|SemrushBot\-SWA|Sidetrade\ indexer\ bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio\-Extended|wpbot|YandexAdditional|YandexAdditionalBot|YouBot)") {
29
+
return 403;
30
+
}
31
+
28
32
location /xrpc {
29
33
proxy_pass $pds;
30
34
proxy_set_header Host $host;
+2
src/main.tsx
+2
src/main.tsx
···
18
18
import { handler as onboardHandler } from "./routes/onboard.tsx";
19
19
import { handler as profileHandler } from "./routes/profile.tsx";
20
20
import { handler as recordHandler } from "./routes/record.ts";
21
+
import { handler as robotsHandler } from "./routes/robots.tsx";
21
22
import { handler as supportHandler } from "./routes/support.tsx";
22
23
import { handler as timelineHandler } from "./routes/timeline.tsx";
23
24
import { handler as uploadHandler } from "./routes/upload.tsx";
···
131
132
route("/actions/get-blob", ["GET"], actions.getBlob),
132
133
...comments,
133
134
route("/:did/:collection/:rkey", recordHandler),
135
+
route("/robots.txt", robotsHandler),
134
136
],
135
137
});
+96
src/routes/robots.tsx
+96
src/routes/robots.tsx
···
1
+
import { RouteHandler } from "@bigmoves/bff";
2
+
3
+
export const handler: RouteHandler = (
4
+
_req,
5
+
_params,
6
+
_ctx,
7
+
) => {
8
+
return new Response(robots, {
9
+
headers: {
10
+
"Content-Type": "text/plain",
11
+
},
12
+
});
13
+
};
14
+
15
+
const robots = `User-agent: AI2Bot
16
+
User-agent: Ai2Bot-Dolma
17
+
User-agent: aiHitBot
18
+
User-agent: Amazonbot
19
+
User-agent: Andibot
20
+
User-agent: anthropic-ai
21
+
User-agent: Applebot
22
+
User-agent: Applebot-Extended
23
+
User-agent: bedrockbot
24
+
User-agent: Brightbot 1.0
25
+
User-agent: Bytespider
26
+
User-agent: CCBot
27
+
User-agent: ChatGPT-User
28
+
User-agent: Claude-SearchBot
29
+
User-agent: Claude-User
30
+
User-agent: Claude-Web
31
+
User-agent: ClaudeBot
32
+
User-agent: cohere-ai
33
+
User-agent: cohere-training-data-crawler
34
+
User-agent: Cotoyogi
35
+
User-agent: Crawlspace
36
+
User-agent: Diffbot
37
+
User-agent: DuckAssistBot
38
+
User-agent: EchoboxBot
39
+
User-agent: FacebookBot
40
+
User-agent: facebookexternalhit
41
+
User-agent: Factset_spyderbot
42
+
User-agent: FirecrawlAgent
43
+
User-agent: FriendlyCrawler
44
+
User-agent: Google-CloudVertexBot
45
+
User-agent: Google-Extended
46
+
User-agent: GoogleOther
47
+
User-agent: GoogleOther-Image
48
+
User-agent: GoogleOther-Video
49
+
User-agent: GPTBot
50
+
User-agent: iaskspider/2.0
51
+
User-agent: ICC-Crawler
52
+
User-agent: ImagesiftBot
53
+
User-agent: img2dataset
54
+
User-agent: ISSCyberRiskCrawler
55
+
User-agent: Kangaroo Bot
56
+
User-agent: meta-externalagent
57
+
User-agent: Meta-ExternalAgent
58
+
User-agent: meta-externalfetcher
59
+
User-agent: Meta-ExternalFetcher
60
+
User-agent: MistralAI-User/1.0
61
+
User-agent: MyCentralAIScraperBot
62
+
User-agent: NovaAct
63
+
User-agent: OAI-SearchBot
64
+
User-agent: omgili
65
+
User-agent: omgilibot
66
+
User-agent: Operator
67
+
User-agent: PanguBot
68
+
User-agent: Panscient
69
+
User-agent: panscient.com
70
+
User-agent: Perplexity-User
71
+
User-agent: PerplexityBot
72
+
User-agent: PetalBot
73
+
User-agent: PhindBot
74
+
User-agent: Poseidon Research Crawler
75
+
User-agent: QualifiedBot
76
+
User-agent: QuillBot
77
+
User-agent: quillbot.com
78
+
User-agent: SBIntuitionsBot
79
+
User-agent: Scrapy
80
+
User-agent: SemrushBot
81
+
User-agent: SemrushBot-BA
82
+
User-agent: SemrushBot-CT
83
+
User-agent: SemrushBot-OCOB
84
+
User-agent: SemrushBot-SI
85
+
User-agent: SemrushBot-SWA
86
+
User-agent: Sidetrade indexer bot
87
+
User-agent: TikTokSpider
88
+
User-agent: Timpibot
89
+
User-agent: VelenPublicWebCrawler
90
+
User-agent: Webzio-Extended
91
+
User-agent: wpbot
92
+
User-agent: YandexAdditional
93
+
User-agent: YandexAdditionalBot
94
+
User-agent: YouBot
95
+
Disallow: /
96
+
`;