Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place

faster backfilling, fix wonkiness of redirects

Changed files
+86 -22
hosting-service
src
+4 -1
hosting-service/src/lib/backfill.ts
··· 2 2 import { fetchSiteRecord, getPdsForDid, downloadAndCacheSite, isCached } from './utils'; 3 3 import { logger } from './observability'; 4 4 import { markSiteAsBeingCached, unmarkSiteAsBeingCached } from './cache'; 5 + import { clearRedirectRulesCache } from '../server'; 5 6 6 7 export interface BackfillOptions { 7 8 skipExisting?: boolean; // Skip sites already in cache ··· 23 24 export async function backfillCache(options: BackfillOptions = {}): Promise<BackfillStats> { 24 25 const { 25 26 skipExisting = true, 26 - concurrency = 3, 27 + concurrency = 10, // Increased from 3 to 10 for better parallelization 27 28 maxSites, 28 29 } = options; 29 30 ··· 103 104 try { 104 105 // Download and cache site 105 106 await downloadAndCacheSite(site.did, site.rkey, siteData.record, pdsEndpoint, siteData.cid); 107 + // Clear redirect rules cache since the site was updated 108 + clearRedirectRulesCache(site.did, site.rkey); 106 109 stats.cached++; 107 110 processed++; 108 111 logger.info('Successfully cached site during backfill', { did: site.did, rkey: site.rkey });
+4
hosting-service/src/lib/firehose.ts
··· 11 11 import { Firehose } from '@atproto/sync' 12 12 import { IdResolver } from '@atproto/identity' 13 13 import { invalidateSiteCache, markSiteAsBeingCached, unmarkSiteAsBeingCached } from './cache' 14 + import { clearRedirectRulesCache } from '../server' 14 15 15 16 const CACHE_DIR = './cache/sites' 16 17 ··· 201 202 pdsEndpoint, 202 203 verifiedCid 203 204 ) 205 + 206 + // Clear redirect rules cache since the site was updated 207 + clearRedirectRulesCache(did, site) 204 208 205 209 // Acquire distributed lock only for database write to prevent duplicate writes 206 210 // Note: upsertSite will check cache-only mode internally and skip if needed
+63 -17
hosting-service/src/lib/redirects.ts
··· 24 24 status: number; 25 25 } 26 26 27 + // Maximum number of redirect rules to prevent DoS attacks 28 + const MAX_REDIRECT_RULES = 1000; 29 + 27 30 /** 28 31 * Parse a _redirects file into an array of redirect rules 29 32 */ ··· 34 37 for (let lineNum = 0; lineNum < lines.length; lineNum++) { 35 38 const lineRaw = lines[lineNum]; 36 39 if (!lineRaw) continue; 37 - 40 + 38 41 const line = lineRaw.trim(); 39 - 42 + 40 43 // Skip empty lines and comments 41 44 if (!line || line.startsWith('#')) { 42 45 continue; 46 + } 47 + 48 + // Enforce max rules limit 49 + if (rules.length >= MAX_REDIRECT_RULES) { 50 + console.warn(`Redirect rules limit reached (${MAX_REDIRECT_RULES}), ignoring remaining rules`); 51 + break; 43 52 } 44 53 45 54 try { ··· 218 227 } 219 228 220 229 /** 221 - * Match a request path against redirect rules 230 + * Match a request path against redirect rules with loop detection 222 231 */ 223 232 export function matchRedirectRule( 224 233 requestPath: string, ··· 227 236 queryParams?: Record<string, string>; 228 237 headers?: Record<string, string>; 229 238 cookies?: Record<string, string>; 230 - } 239 + }, 240 + visitedPaths: Set<string> = new Set() 231 241 ): RedirectMatch | null { 232 242 // Normalize path: ensure leading slash, remove trailing slash (except for root) 233 243 let normalizedPath = requestPath.startsWith('/') ? requestPath : `/${requestPath}`; 234 - 244 + 245 + // Detect redirect loops 246 + if (visitedPaths.has(normalizedPath)) { 247 + console.warn(`Redirect loop detected for path: ${normalizedPath}`); 248 + return null; 249 + } 250 + 251 + // Track this path to detect loops 252 + visitedPaths.add(normalizedPath); 253 + 254 + // Limit redirect chain depth to 10 255 + if (visitedPaths.size > 10) { 256 + console.warn(`Redirect chain too deep (>10) for path: ${normalizedPath}`); 257 + return null; 258 + } 259 + 235 260 for (const rule of rules) { 236 261 // Check query parameter conditions first (if any) 237 262 if (rule.queryParams) { ··· 239 264 if (!context?.queryParams) { 240 265 continue; 241 266 } 242 - 243 - const queryMatches = Object.entries(rule.queryParams).every(([key, value]) => { 267 + 268 + // Check that all required query params are present 269 + // The value in rule.queryParams is either a literal or a placeholder (:name) 270 + const queryMatches = Object.entries(rule.queryParams).every(([key, expectedValue]) => { 244 271 const actualValue = context.queryParams?.[key]; 245 - return actualValue !== undefined; 272 + 273 + // Query param must exist 274 + if (actualValue === undefined) { 275 + return false; 276 + } 277 + 278 + // If expected value is a placeholder (:name), any value is acceptable 279 + // If it's a literal, it must match exactly 280 + if (expectedValue && !expectedValue.startsWith(':')) { 281 + return actualValue === expectedValue; 282 + } 283 + 284 + return true; 246 285 }); 247 - 286 + 248 287 if (!queryMatches) { 249 288 continue; 250 289 } ··· 302 341 303 342 // Build the target path by replacing placeholders 304 343 let targetPath = rule.to; 305 - 306 - // Replace captured parameters 344 + 345 + // Replace captured parameters (with URL encoding) 307 346 if (rule.fromParams && match.length > 1) { 308 347 for (let i = 0; i < rule.fromParams.length; i++) { 309 348 const paramName = rule.fromParams[i]; 310 349 const paramValue = match[i + 1]; 311 - 350 + 312 351 if (!paramName || !paramValue) continue; 313 - 352 + 353 + // URL encode captured values to prevent invalid URLs 354 + const encodedValue = encodeURIComponent(paramValue); 355 + 314 356 if (paramName === 'splat') { 315 - targetPath = targetPath.replace(':splat', paramValue); 357 + // For splats, preserve slashes by re-decoding them 358 + const splatValue = encodedValue.replace(/%2F/g, '/'); 359 + targetPath = targetPath.replace(':splat', splatValue); 316 360 } else { 317 - targetPath = targetPath.replace(`:${paramName}`, paramValue); 361 + targetPath = targetPath.replace(`:${paramName}`, encodedValue); 318 362 } 319 363 } 320 364 } 321 365 322 - // Handle query parameter replacements 366 + // Handle query parameter replacements (with URL encoding) 323 367 if (rule.queryParams && context?.queryParams) { 324 368 for (const [key, placeholder] of Object.entries(rule.queryParams)) { 325 369 const actualValue = context.queryParams[key]; 326 370 if (actualValue && placeholder && placeholder.startsWith(':')) { 327 371 const paramName = placeholder.slice(1); 328 372 if (paramName) { 329 - targetPath = targetPath.replace(`:${paramName}`, actualValue); 373 + // URL encode query parameter values 374 + const encodedValue = encodeURIComponent(actualValue); 375 + targetPath = targetPath.replace(`:${paramName}`, encodedValue); 330 376 } 331 377 } 332 378 }
+6
src/lib/wisp-utils.test.ts
··· 58 58 expect(shouldCompressFile('text/plain')).toBe(true) 59 59 }) 60 60 61 + test('should NOT compress _redirects file', () => { 62 + expect(shouldCompressFile('text/plain', '_redirects')).toBe(false) 63 + expect(shouldCompressFile('text/plain', 'folder/_redirects')).toBe(false) 64 + expect(shouldCompressFile('application/octet-stream', '_redirects')).toBe(false) 65 + }) 66 + 61 67 test('should NOT compress images', () => { 62 68 expect(shouldCompressFile('image/png')).toBe(false) 63 69 expect(shouldCompressFile('image/jpeg')).toBe(false)
+7 -2
src/lib/wisp-utils.ts
··· 32 32 } 33 33 34 34 /** 35 - * Determine if a file should be gzip compressed based on its MIME type 35 + * Determine if a file should be gzip compressed based on its MIME type and filename 36 36 */ 37 - export function shouldCompressFile(mimeType: string): boolean { 37 + export function shouldCompressFile(mimeType: string, fileName?: string): boolean { 38 + // Never compress _redirects file - it needs to be plain text for the hosting service 39 + if (fileName && (fileName.endsWith('/_redirects') || fileName === '_redirects')) { 40 + return false; 41 + } 42 + 38 43 // Compress text-based files and uncompressed audio formats 39 44 const compressibleTypes = [ 40 45 'text/html',
+2 -2
src/routes/wisp.ts
··· 191 191 const originalContent = Buffer.from(arrayBuffer); 192 192 const originalMimeType = file.type || 'application/octet-stream'; 193 193 194 - // Determine if file should be compressed 195 - const shouldCompress = shouldCompressFile(originalMimeType); 194 + // Determine if file should be compressed (pass filename to exclude _redirects) 195 + const shouldCompress = shouldCompressFile(originalMimeType, normalizedPath); 196 196 197 197 // Text files (HTML/CSS/JS) need base64 encoding to prevent PDS content sniffing 198 198 // Audio files just need compression without base64