+4
-1
hosting-service/src/lib/backfill.ts
+4
-1
hosting-service/src/lib/backfill.ts
···
2
2
import { fetchSiteRecord, getPdsForDid, downloadAndCacheSite, isCached } from './utils';
3
3
import { logger } from './observability';
4
4
import { markSiteAsBeingCached, unmarkSiteAsBeingCached } from './cache';
5
+
import { clearRedirectRulesCache } from '../server';
5
6
6
7
export interface BackfillOptions {
7
8
skipExisting?: boolean; // Skip sites already in cache
···
23
24
export async function backfillCache(options: BackfillOptions = {}): Promise<BackfillStats> {
24
25
const {
25
26
skipExisting = true,
26
-
concurrency = 3,
27
+
concurrency = 10, // Increased from 3 to 10 for better parallelization
27
28
maxSites,
28
29
} = options;
29
30
···
103
104
try {
104
105
// Download and cache site
105
106
await downloadAndCacheSite(site.did, site.rkey, siteData.record, pdsEndpoint, siteData.cid);
107
+
// Clear redirect rules cache since the site was updated
108
+
clearRedirectRulesCache(site.did, site.rkey);
106
109
stats.cached++;
107
110
processed++;
108
111
logger.info('Successfully cached site during backfill', { did: site.did, rkey: site.rkey });
+4
hosting-service/src/lib/firehose.ts
+4
hosting-service/src/lib/firehose.ts
···
11
11
import { Firehose } from '@atproto/sync'
12
12
import { IdResolver } from '@atproto/identity'
13
13
import { invalidateSiteCache, markSiteAsBeingCached, unmarkSiteAsBeingCached } from './cache'
14
+
import { clearRedirectRulesCache } from '../server'
14
15
15
16
const CACHE_DIR = './cache/sites'
16
17
···
201
202
pdsEndpoint,
202
203
verifiedCid
203
204
)
205
+
206
+
// Clear redirect rules cache since the site was updated
207
+
clearRedirectRulesCache(did, site)
204
208
205
209
// Acquire distributed lock only for database write to prevent duplicate writes
206
210
// Note: upsertSite will check cache-only mode internally and skip if needed
+63
-17
hosting-service/src/lib/redirects.ts
+63
-17
hosting-service/src/lib/redirects.ts
···
24
24
status: number;
25
25
}
26
26
27
+
// Maximum number of redirect rules to prevent DoS attacks
28
+
const MAX_REDIRECT_RULES = 1000;
29
+
27
30
/**
28
31
* Parse a _redirects file into an array of redirect rules
29
32
*/
···
34
37
for (let lineNum = 0; lineNum < lines.length; lineNum++) {
35
38
const lineRaw = lines[lineNum];
36
39
if (!lineRaw) continue;
37
-
40
+
38
41
const line = lineRaw.trim();
39
-
42
+
40
43
// Skip empty lines and comments
41
44
if (!line || line.startsWith('#')) {
42
45
continue;
46
+
}
47
+
48
+
// Enforce max rules limit
49
+
if (rules.length >= MAX_REDIRECT_RULES) {
50
+
console.warn(`Redirect rules limit reached (${MAX_REDIRECT_RULES}), ignoring remaining rules`);
51
+
break;
43
52
}
44
53
45
54
try {
···
218
227
}
219
228
220
229
/**
221
-
* Match a request path against redirect rules
230
+
* Match a request path against redirect rules with loop detection
222
231
*/
223
232
export function matchRedirectRule(
224
233
requestPath: string,
···
227
236
queryParams?: Record<string, string>;
228
237
headers?: Record<string, string>;
229
238
cookies?: Record<string, string>;
230
-
}
239
+
},
240
+
visitedPaths: Set<string> = new Set()
231
241
): RedirectMatch | null {
232
242
// Normalize path: ensure leading slash, remove trailing slash (except for root)
233
243
let normalizedPath = requestPath.startsWith('/') ? requestPath : `/${requestPath}`;
234
-
244
+
245
+
// Detect redirect loops
246
+
if (visitedPaths.has(normalizedPath)) {
247
+
console.warn(`Redirect loop detected for path: ${normalizedPath}`);
248
+
return null;
249
+
}
250
+
251
+
// Track this path to detect loops
252
+
visitedPaths.add(normalizedPath);
253
+
254
+
// Limit redirect chain depth to 10
255
+
if (visitedPaths.size > 10) {
256
+
console.warn(`Redirect chain too deep (>10) for path: ${normalizedPath}`);
257
+
return null;
258
+
}
259
+
235
260
for (const rule of rules) {
236
261
// Check query parameter conditions first (if any)
237
262
if (rule.queryParams) {
···
239
264
if (!context?.queryParams) {
240
265
continue;
241
266
}
242
-
243
-
const queryMatches = Object.entries(rule.queryParams).every(([key, value]) => {
267
+
268
+
// Check that all required query params are present
269
+
// The value in rule.queryParams is either a literal or a placeholder (:name)
270
+
const queryMatches = Object.entries(rule.queryParams).every(([key, expectedValue]) => {
244
271
const actualValue = context.queryParams?.[key];
245
-
return actualValue !== undefined;
272
+
273
+
// Query param must exist
274
+
if (actualValue === undefined) {
275
+
return false;
276
+
}
277
+
278
+
// If expected value is a placeholder (:name), any value is acceptable
279
+
// If it's a literal, it must match exactly
280
+
if (expectedValue && !expectedValue.startsWith(':')) {
281
+
return actualValue === expectedValue;
282
+
}
283
+
284
+
return true;
246
285
});
247
-
286
+
248
287
if (!queryMatches) {
249
288
continue;
250
289
}
···
302
341
303
342
// Build the target path by replacing placeholders
304
343
let targetPath = rule.to;
305
-
306
-
// Replace captured parameters
344
+
345
+
// Replace captured parameters (with URL encoding)
307
346
if (rule.fromParams && match.length > 1) {
308
347
for (let i = 0; i < rule.fromParams.length; i++) {
309
348
const paramName = rule.fromParams[i];
310
349
const paramValue = match[i + 1];
311
-
350
+
312
351
if (!paramName || !paramValue) continue;
313
-
352
+
353
+
// URL encode captured values to prevent invalid URLs
354
+
const encodedValue = encodeURIComponent(paramValue);
355
+
314
356
if (paramName === 'splat') {
315
-
targetPath = targetPath.replace(':splat', paramValue);
357
+
// For splats, preserve slashes by re-decoding them
358
+
const splatValue = encodedValue.replace(/%2F/g, '/');
359
+
targetPath = targetPath.replace(':splat', splatValue);
316
360
} else {
317
-
targetPath = targetPath.replace(`:${paramName}`, paramValue);
361
+
targetPath = targetPath.replace(`:${paramName}`, encodedValue);
318
362
}
319
363
}
320
364
}
321
365
322
-
// Handle query parameter replacements
366
+
// Handle query parameter replacements (with URL encoding)
323
367
if (rule.queryParams && context?.queryParams) {
324
368
for (const [key, placeholder] of Object.entries(rule.queryParams)) {
325
369
const actualValue = context.queryParams[key];
326
370
if (actualValue && placeholder && placeholder.startsWith(':')) {
327
371
const paramName = placeholder.slice(1);
328
372
if (paramName) {
329
-
targetPath = targetPath.replace(`:${paramName}`, actualValue);
373
+
// URL encode query parameter values
374
+
const encodedValue = encodeURIComponent(actualValue);
375
+
targetPath = targetPath.replace(`:${paramName}`, encodedValue);
330
376
}
331
377
}
332
378
}
+6
src/lib/wisp-utils.test.ts
+6
src/lib/wisp-utils.test.ts
···
58
58
expect(shouldCompressFile('text/plain')).toBe(true)
59
59
})
60
60
61
+
test('should NOT compress _redirects file', () => {
62
+
expect(shouldCompressFile('text/plain', '_redirects')).toBe(false)
63
+
expect(shouldCompressFile('text/plain', 'folder/_redirects')).toBe(false)
64
+
expect(shouldCompressFile('application/octet-stream', '_redirects')).toBe(false)
65
+
})
66
+
61
67
test('should NOT compress images', () => {
62
68
expect(shouldCompressFile('image/png')).toBe(false)
63
69
expect(shouldCompressFile('image/jpeg')).toBe(false)
+7
-2
src/lib/wisp-utils.ts
+7
-2
src/lib/wisp-utils.ts
···
32
32
}
33
33
34
34
/**
35
-
* Determine if a file should be gzip compressed based on its MIME type
35
+
* Determine if a file should be gzip compressed based on its MIME type and filename
36
36
*/
37
-
export function shouldCompressFile(mimeType: string): boolean {
37
+
export function shouldCompressFile(mimeType: string, fileName?: string): boolean {
38
+
// Never compress _redirects file - it needs to be plain text for the hosting service
39
+
if (fileName && (fileName.endsWith('/_redirects') || fileName === '_redirects')) {
40
+
return false;
41
+
}
42
+
38
43
// Compress text-based files and uncompressed audio formats
39
44
const compressibleTypes = [
40
45
'text/html',
+2
-2
src/routes/wisp.ts
+2
-2
src/routes/wisp.ts
···
191
191
const originalContent = Buffer.from(arrayBuffer);
192
192
const originalMimeType = file.type || 'application/octet-stream';
193
193
194
-
// Determine if file should be compressed
195
-
const shouldCompress = shouldCompressFile(originalMimeType);
194
+
// Determine if file should be compressed (pass filename to exclude _redirects)
195
+
const shouldCompress = shouldCompressFile(originalMimeType, normalizedPath);
196
196
197
197
// Text files (HTML/CSS/JS) need base64 encoding to prevent PDS content sniffing
198
198
// Audio files just need compression without base64