Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place

update dockerfiles, solidify html path rewriting

+8
.dockerignore
··· 9 9 *.log 10 10 .vscode 11 11 .idea 12 + server 13 + .prettierrc 14 + testDeploy 15 + .tangled 16 + .crush 17 + .claude 18 + server 19 + hosting-service
+10 -6
Dockerfile
··· 15 15 COPY public ./public 16 16 17 17 # Build the application (if needed) 18 - # RUN bun run build 18 + RUN bun build \ 19 + --compile \ 20 + --minify \ 21 + --outfile server \ 22 + src/index.ts 23 + 24 + FROM scratch AS runtime 25 + WORKDIR /app 26 + COPY --from=base /app/server /app/server 19 27 20 28 # Set environment variables (can be overridden at runtime) 21 29 ENV PORT=3000 ··· 24 32 # Expose the application port 25 33 EXPOSE 3000 26 34 27 - # Health check 28 - HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ 29 - CMD bun -e "fetch('http://localhost:3000/health').then(r => r.ok ? process.exit(0) : process.exit(1)).catch(() => process.exit(1))" 30 - 31 35 # Start the application 32 - CMD ["bun", "src/index.ts"] 36 + CMD ["./server"]
+259 -219
hosting-service/src/lib/firehose.ts
··· 1 - import { existsSync, rmSync } from 'fs'; 2 - import { getPdsForDid, downloadAndCacheSite, extractBlobCid, fetchSiteRecord } from './utils'; 3 - import { upsertSite, tryAcquireLock, releaseLock } from './db'; 4 - import { safeFetch } from './safe-fetch'; 5 - import { isRecord, validateRecord } from '../lexicon/types/place/wisp/fs'; 6 - import { Firehose } from '@atproto/sync'; 7 - import { IdResolver } from '@atproto/identity'; 1 + import { existsSync, rmSync } from 'fs' 2 + import { 3 + getPdsForDid, 4 + downloadAndCacheSite, 5 + extractBlobCid, 6 + fetchSiteRecord 7 + } from './utils' 8 + import { upsertSite, tryAcquireLock, releaseLock } from './db' 9 + import { safeFetch } from './safe-fetch' 10 + import { isRecord, validateRecord } from '../lexicon/types/place/wisp/fs' 11 + import { Firehose } from '@atproto/sync' 12 + import { IdResolver } from '@atproto/identity' 8 13 9 - const CACHE_DIR = './cache/sites'; 14 + const CACHE_DIR = './cache/sites' 10 15 11 16 export class FirehoseWorker { 12 - private firehose: Firehose | null = null; 13 - private idResolver: IdResolver; 14 - private isShuttingDown = false; 15 - private lastEventTime = Date.now(); 17 + private firehose: Firehose | null = null 18 + private idResolver: IdResolver 19 + private isShuttingDown = false 20 + private lastEventTime = Date.now() 16 21 17 - constructor( 18 - private logger?: (msg: string, data?: Record<string, unknown>) => void, 19 - ) { 20 - this.idResolver = new IdResolver(); 21 - } 22 + constructor( 23 + private logger?: (msg: string, data?: Record<string, unknown>) => void 24 + ) { 25 + this.idResolver = new IdResolver() 26 + } 22 27 23 - private log(msg: string, data?: Record<string, unknown>) { 24 - const log = this.logger || console.log; 25 - log(`[FirehoseWorker] ${msg}`, data || {}); 26 - } 28 + private log(msg: string, data?: Record<string, unknown>) { 29 + const log = this.logger || console.log 30 + log(`[FirehoseWorker] ${msg}`, data || {}) 31 + } 27 32 28 - start() { 29 - this.log('Starting firehose worker'); 30 - this.connect(); 31 - } 33 + start() { 34 + this.log('Starting firehose worker') 35 + this.connect() 36 + } 32 37 33 - stop() { 34 - this.log('Stopping firehose worker'); 35 - this.isShuttingDown = true; 38 + stop() { 39 + this.log('Stopping firehose worker') 40 + this.isShuttingDown = true 36 41 37 - if (this.firehose) { 38 - this.firehose.destroy(); 39 - this.firehose = null; 40 - } 41 - } 42 + if (this.firehose) { 43 + this.firehose.destroy() 44 + this.firehose = null 45 + } 46 + } 42 47 43 - private connect() { 44 - if (this.isShuttingDown) return; 48 + private connect() { 49 + if (this.isShuttingDown) return 45 50 46 - this.log('Connecting to AT Protocol firehose'); 51 + this.log('Connecting to AT Protocol firehose') 47 52 48 - this.firehose = new Firehose({ 49 - idResolver: this.idResolver, 50 - service: 'wss://bsky.network', 51 - filterCollections: ['place.wisp.fs'], 52 - handleEvent: async (evt: any) => { 53 - this.lastEventTime = Date.now(); 53 + this.firehose = new Firehose({ 54 + idResolver: this.idResolver, 55 + service: 'wss://bsky.network', 56 + filterCollections: ['place.wisp.fs'], 57 + handleEvent: async (evt: any) => { 58 + this.lastEventTime = Date.now() 54 59 55 - // Watch for write events 56 - if (evt.event === 'create' || evt.event === 'update') { 57 - const record = evt.record; 60 + // Watch for write events 61 + if (evt.event === 'create' || evt.event === 'update') { 62 + const record = evt.record 58 63 59 - // If the write is a valid place.wisp.fs record 60 - if ( 61 - evt.collection === 'place.wisp.fs' && 62 - isRecord(record) && 63 - validateRecord(record).success 64 - ) { 65 - this.log('Received place.wisp.fs event', { 66 - did: evt.did, 67 - event: evt.event, 68 - rkey: evt.rkey, 69 - }); 64 + // If the write is a valid place.wisp.fs record 65 + if ( 66 + evt.collection === 'place.wisp.fs' && 67 + isRecord(record) && 68 + validateRecord(record).success 69 + ) { 70 + this.log('Received place.wisp.fs event', { 71 + did: evt.did, 72 + event: evt.event, 73 + rkey: evt.rkey 74 + }) 70 75 71 - try { 72 - await this.handleCreateOrUpdate(evt.did, evt.rkey, record, evt.cid?.toString()); 73 - } catch (err) { 74 - this.log('Error handling event', { 75 - did: evt.did, 76 - event: evt.event, 77 - rkey: evt.rkey, 78 - error: err instanceof Error ? err.message : String(err), 79 - }); 80 - } 81 - } 82 - } else if (evt.event === 'delete' && evt.collection === 'place.wisp.fs') { 83 - this.log('Received delete event', { 84 - did: evt.did, 85 - rkey: evt.rkey, 86 - }); 76 + try { 77 + await this.handleCreateOrUpdate( 78 + evt.did, 79 + evt.rkey, 80 + record, 81 + evt.cid?.toString() 82 + ) 83 + } catch (err) { 84 + this.log('Error handling event', { 85 + did: evt.did, 86 + event: evt.event, 87 + rkey: evt.rkey, 88 + error: 89 + err instanceof Error 90 + ? err.message 91 + : String(err) 92 + }) 93 + } 94 + } 95 + } else if ( 96 + evt.event === 'delete' && 97 + evt.collection === 'place.wisp.fs' 98 + ) { 99 + this.log('Received delete event', { 100 + did: evt.did, 101 + rkey: evt.rkey 102 + }) 87 103 88 - try { 89 - await this.handleDelete(evt.did, evt.rkey); 90 - } catch (err) { 91 - this.log('Error handling delete', { 92 - did: evt.did, 93 - rkey: evt.rkey, 94 - error: err instanceof Error ? err.message : String(err), 95 - }); 96 - } 97 - } 98 - }, 99 - onError: (err: any) => { 100 - this.log('Firehose error', { 101 - error: err instanceof Error ? err.message : String(err), 102 - stack: err instanceof Error ? err.stack : undefined, 103 - fullError: err, 104 - }); 105 - console.error('Full firehose error:', err); 106 - }, 107 - }); 104 + try { 105 + await this.handleDelete(evt.did, evt.rkey) 106 + } catch (err) { 107 + this.log('Error handling delete', { 108 + did: evt.did, 109 + rkey: evt.rkey, 110 + error: 111 + err instanceof Error ? err.message : String(err) 112 + }) 113 + } 114 + } 115 + }, 116 + onError: (err: any) => { 117 + this.log('Firehose error', { 118 + error: err instanceof Error ? err.message : String(err), 119 + stack: err instanceof Error ? err.stack : undefined, 120 + fullError: err 121 + }) 122 + console.error('Full firehose error:', err) 123 + } 124 + }) 108 125 109 - this.firehose.start(); 110 - this.log('Firehose started'); 111 - } 126 + this.firehose.start() 127 + this.log('Firehose started') 128 + } 112 129 113 - private async handleCreateOrUpdate(did: string, site: string, record: any, eventCid?: string) { 114 - this.log('Processing create/update', { did, site }); 130 + private async handleCreateOrUpdate( 131 + did: string, 132 + site: string, 133 + record: any, 134 + eventCid?: string 135 + ) { 136 + this.log('Processing create/update', { did, site }) 115 137 116 - // Record is already validated in handleEvent 117 - const fsRecord = record; 138 + // Record is already validated in handleEvent 139 + const fsRecord = record 118 140 119 - const pdsEndpoint = await getPdsForDid(did); 120 - if (!pdsEndpoint) { 121 - this.log('Could not resolve PDS for DID', { did }); 122 - return; 123 - } 141 + const pdsEndpoint = await getPdsForDid(did) 142 + if (!pdsEndpoint) { 143 + this.log('Could not resolve PDS for DID', { did }) 144 + return 145 + } 124 146 125 - this.log('Resolved PDS', { did, pdsEndpoint }); 147 + this.log('Resolved PDS', { did, pdsEndpoint }) 126 148 127 - // Verify record exists on PDS and fetch its CID 128 - let verifiedCid: string; 129 - try { 130 - const result = await fetchSiteRecord(did, site); 149 + // Verify record exists on PDS and fetch its CID 150 + let verifiedCid: string 151 + try { 152 + const result = await fetchSiteRecord(did, site) 131 153 132 - if (!result) { 133 - this.log('Record not found on PDS, skipping cache', { did, site }); 134 - return; 135 - } 154 + if (!result) { 155 + this.log('Record not found on PDS, skipping cache', { 156 + did, 157 + site 158 + }) 159 + return 160 + } 136 161 137 - verifiedCid = result.cid; 162 + verifiedCid = result.cid 138 163 139 - // Verify event CID matches PDS CID (prevent cache poisoning) 140 - if (eventCid && eventCid !== verifiedCid) { 141 - this.log('CID mismatch detected - potential spoofed event', { 142 - did, 143 - site, 144 - eventCid, 145 - verifiedCid 146 - }); 147 - return; 148 - } 164 + // Verify event CID matches PDS CID (prevent cache poisoning) 165 + if (eventCid && eventCid !== verifiedCid) { 166 + this.log('CID mismatch detected - potential spoofed event', { 167 + did, 168 + site, 169 + eventCid, 170 + verifiedCid 171 + }) 172 + return 173 + } 149 174 150 - this.log('Record verified on PDS', { did, site, cid: verifiedCid }); 151 - } catch (err) { 152 - this.log('Failed to verify record on PDS', { 153 - did, 154 - site, 155 - error: err instanceof Error ? err.message : String(err), 156 - }); 157 - return; 158 - } 175 + this.log('Record verified on PDS', { did, site, cid: verifiedCid }) 176 + } catch (err) { 177 + this.log('Failed to verify record on PDS', { 178 + did, 179 + site, 180 + error: err instanceof Error ? err.message : String(err) 181 + }) 182 + return 183 + } 159 184 160 - // Cache the record with verified CID (uses atomic swap internally) 161 - // All instances cache locally for edge serving 162 - await downloadAndCacheSite(did, site, fsRecord, pdsEndpoint, verifiedCid); 185 + // Cache the record with verified CID (uses atomic swap internally) 186 + // All instances cache locally for edge serving 187 + await downloadAndCacheSite( 188 + did, 189 + site, 190 + fsRecord, 191 + pdsEndpoint, 192 + verifiedCid 193 + ) 163 194 164 - // Acquire distributed lock only for database write to prevent duplicate writes 165 - const lockKey = `db:upsert:${did}:${site}`; 166 - const lockAcquired = await tryAcquireLock(lockKey); 195 + // Acquire distributed lock only for database write to prevent duplicate writes 196 + const lockKey = `db:upsert:${did}:${site}` 197 + const lockAcquired = await tryAcquireLock(lockKey) 167 198 168 - if (!lockAcquired) { 169 - this.log('Another instance is writing to DB, skipping upsert', { did, site }); 170 - this.log('Successfully processed create/update (cached locally)', { did, site }); 171 - return; 172 - } 199 + if (!lockAcquired) { 200 + this.log('Another instance is writing to DB, skipping upsert', { 201 + did, 202 + site 203 + }) 204 + this.log('Successfully processed create/update (cached locally)', { 205 + did, 206 + site 207 + }) 208 + return 209 + } 173 210 174 - try { 175 - // Upsert site to database (only one instance does this) 176 - await upsertSite(did, site, fsRecord.site); 177 - this.log('Successfully processed create/update (cached + DB updated)', { did, site }); 178 - } finally { 179 - // Always release lock, even if DB write fails 180 - await releaseLock(lockKey); 181 - } 182 - } 211 + try { 212 + // Upsert site to database (only one instance does this) 213 + await upsertSite(did, site, fsRecord.site) 214 + this.log( 215 + 'Successfully processed create/update (cached + DB updated)', 216 + { did, site } 217 + ) 218 + } finally { 219 + // Always release lock, even if DB write fails 220 + await releaseLock(lockKey) 221 + } 222 + } 183 223 184 - private async handleDelete(did: string, site: string) { 185 - this.log('Processing delete', { did, site }); 224 + private async handleDelete(did: string, site: string) { 225 + this.log('Processing delete', { did, site }) 186 226 187 - // All instances should delete their local cache (no lock needed) 188 - const pdsEndpoint = await getPdsForDid(did); 189 - if (!pdsEndpoint) { 190 - this.log('Could not resolve PDS for DID', { did }); 191 - return; 192 - } 227 + // All instances should delete their local cache (no lock needed) 228 + const pdsEndpoint = await getPdsForDid(did) 229 + if (!pdsEndpoint) { 230 + this.log('Could not resolve PDS for DID', { did }) 231 + return 232 + } 193 233 194 - // Verify record is actually deleted from PDS 195 - try { 196 - const recordUrl = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(site)}`; 197 - const recordRes = await safeFetch(recordUrl); 234 + // Verify record is actually deleted from PDS 235 + try { 236 + const recordUrl = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(site)}` 237 + const recordRes = await safeFetch(recordUrl) 198 238 199 - if (recordRes.ok) { 200 - this.log('Record still exists on PDS, not deleting cache', { 201 - did, 202 - site, 203 - }); 204 - return; 205 - } 239 + if (recordRes.ok) { 240 + this.log('Record still exists on PDS, not deleting cache', { 241 + did, 242 + site 243 + }) 244 + return 245 + } 206 246 207 - this.log('Verified record is deleted from PDS', { 208 - did, 209 - site, 210 - status: recordRes.status, 211 - }); 212 - } catch (err) { 213 - this.log('Error verifying deletion on PDS', { 214 - did, 215 - site, 216 - error: err instanceof Error ? err.message : String(err), 217 - }); 218 - } 247 + this.log('Verified record is deleted from PDS', { 248 + did, 249 + site, 250 + status: recordRes.status 251 + }) 252 + } catch (err) { 253 + this.log('Error verifying deletion on PDS', { 254 + did, 255 + site, 256 + error: err instanceof Error ? err.message : String(err) 257 + }) 258 + } 219 259 220 - // Delete cache 221 - this.deleteCache(did, site); 260 + // Delete cache 261 + this.deleteCache(did, site) 222 262 223 - this.log('Successfully processed delete', { did, site }); 224 - } 263 + this.log('Successfully processed delete', { did, site }) 264 + } 225 265 226 - private deleteCache(did: string, site: string) { 227 - const cacheDir = `${CACHE_DIR}/${did}/${site}`; 266 + private deleteCache(did: string, site: string) { 267 + const cacheDir = `${CACHE_DIR}/${did}/${site}` 228 268 229 - if (!existsSync(cacheDir)) { 230 - this.log('Cache directory does not exist, nothing to delete', { 231 - did, 232 - site, 233 - }); 234 - return; 235 - } 269 + if (!existsSync(cacheDir)) { 270 + this.log('Cache directory does not exist, nothing to delete', { 271 + did, 272 + site 273 + }) 274 + return 275 + } 236 276 237 - try { 238 - rmSync(cacheDir, { recursive: true, force: true }); 239 - this.log('Cache deleted', { did, site, path: cacheDir }); 240 - } catch (err) { 241 - this.log('Failed to delete cache', { 242 - did, 243 - site, 244 - path: cacheDir, 245 - error: err instanceof Error ? err.message : String(err), 246 - }); 247 - } 248 - } 277 + try { 278 + rmSync(cacheDir, { recursive: true, force: true }) 279 + this.log('Cache deleted', { did, site, path: cacheDir }) 280 + } catch (err) { 281 + this.log('Failed to delete cache', { 282 + did, 283 + site, 284 + path: cacheDir, 285 + error: err instanceof Error ? err.message : String(err) 286 + }) 287 + } 288 + } 249 289 250 - getHealth() { 251 - const isConnected = this.firehose !== null; 252 - const timeSinceLastEvent = Date.now() - this.lastEventTime; 290 + getHealth() { 291 + const isConnected = this.firehose !== null 292 + const timeSinceLastEvent = Date.now() - this.lastEventTime 253 293 254 - return { 255 - connected: isConnected, 256 - lastEventTime: this.lastEventTime, 257 - timeSinceLastEvent, 258 - healthy: isConnected && timeSinceLastEvent < 300000, // 5 minutes 259 - }; 260 - } 294 + return { 295 + connected: isConnected, 296 + lastEventTime: this.lastEventTime, 297 + timeSinceLastEvent, 298 + healthy: isConnected && timeSinceLastEvent < 300000 // 5 minutes 299 + } 300 + } 261 301 }
+457
hosting-service/src/lib/html-rewriter.test.ts
··· 1 + import { describe, test, expect } from 'bun:test' 2 + import { rewriteHtmlPaths, isHtmlContent } from './html-rewriter' 3 + 4 + describe('rewriteHtmlPaths', () => { 5 + const basePath = '/identifier/site/' 6 + 7 + describe('absolute paths', () => { 8 + test('rewrites absolute paths with leading slash', () => { 9 + const html = '<img src="/image.png">' 10 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 11 + expect(result).toBe('<img src="/identifier/site/image.png">') 12 + }) 13 + 14 + test('rewrites nested absolute paths', () => { 15 + const html = '<link href="/css/style.css">' 16 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 17 + expect(result).toBe('<link href="/identifier/site/css/style.css">') 18 + }) 19 + }) 20 + 21 + describe('relative paths from root document', () => { 22 + test('rewrites relative paths with ./ prefix', () => { 23 + const html = '<img src="./image.png">' 24 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 25 + expect(result).toBe('<img src="/identifier/site/image.png">') 26 + }) 27 + 28 + test('rewrites relative paths without prefix', () => { 29 + const html = '<img src="image.png">' 30 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 31 + expect(result).toBe('<img src="/identifier/site/image.png">') 32 + }) 33 + 34 + test('rewrites relative paths with ../ (should stay at root)', () => { 35 + const html = '<img src="../image.png">' 36 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 37 + expect(result).toBe('<img src="/identifier/site/image.png">') 38 + }) 39 + }) 40 + 41 + describe('relative paths from nested documents', () => { 42 + test('rewrites relative path from nested document', () => { 43 + const html = '<img src="./photo.jpg">' 44 + const result = rewriteHtmlPaths( 45 + html, 46 + basePath, 47 + 'folder1/folder2/index.html' 48 + ) 49 + expect(result).toBe( 50 + '<img src="/identifier/site/folder1/folder2/photo.jpg">' 51 + ) 52 + }) 53 + 54 + test('rewrites plain filename from nested document', () => { 55 + const html = '<script src="app.js"></script>' 56 + const result = rewriteHtmlPaths( 57 + html, 58 + basePath, 59 + 'folder1/folder2/index.html' 60 + ) 61 + expect(result).toBe( 62 + '<script src="/identifier/site/folder1/folder2/app.js"></script>' 63 + ) 64 + }) 65 + 66 + test('rewrites ../ to go up one level', () => { 67 + const html = '<img src="../image.png">' 68 + const result = rewriteHtmlPaths( 69 + html, 70 + basePath, 71 + 'folder1/folder2/folder3/index.html' 72 + ) 73 + expect(result).toBe( 74 + '<img src="/identifier/site/folder1/folder2/image.png">' 75 + ) 76 + }) 77 + 78 + test('rewrites multiple ../ to go up multiple levels', () => { 79 + const html = '<link href="../../css/style.css">' 80 + const result = rewriteHtmlPaths( 81 + html, 82 + basePath, 83 + 'folder1/folder2/folder3/index.html' 84 + ) 85 + expect(result).toBe( 86 + '<link href="/identifier/site/folder1/css/style.css">' 87 + ) 88 + }) 89 + 90 + test('rewrites ../ with additional path segments', () => { 91 + const html = '<img src="../assets/logo.png">' 92 + const result = rewriteHtmlPaths( 93 + html, 94 + basePath, 95 + 'pages/about/index.html' 96 + ) 97 + expect(result).toBe( 98 + '<img src="/identifier/site/pages/assets/logo.png">' 99 + ) 100 + }) 101 + 102 + test('handles complex nested relative paths', () => { 103 + const html = '<script src="../../lib/vendor/jquery.js"></script>' 104 + const result = rewriteHtmlPaths( 105 + html, 106 + basePath, 107 + 'pages/blog/post/index.html' 108 + ) 109 + expect(result).toBe( 110 + '<script src="/identifier/site/pages/lib/vendor/jquery.js"></script>' 111 + ) 112 + }) 113 + 114 + test('handles ../ going past root (stays at root)', () => { 115 + const html = '<img src="../../../image.png">' 116 + const result = rewriteHtmlPaths(html, basePath, 'folder1/index.html') 117 + expect(result).toBe('<img src="/identifier/site/image.png">') 118 + }) 119 + }) 120 + 121 + describe('external URLs and special schemes', () => { 122 + test('does not rewrite http URLs', () => { 123 + const html = '<img src="http://example.com/image.png">' 124 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 125 + expect(result).toBe('<img src="http://example.com/image.png">') 126 + }) 127 + 128 + test('does not rewrite https URLs', () => { 129 + const html = '<link href="https://cdn.example.com/style.css">' 130 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 131 + expect(result).toBe( 132 + '<link href="https://cdn.example.com/style.css">' 133 + ) 134 + }) 135 + 136 + test('does not rewrite protocol-relative URLs', () => { 137 + const html = '<script src="//cdn.example.com/script.js"></script>' 138 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 139 + expect(result).toBe( 140 + '<script src="//cdn.example.com/script.js"></script>' 141 + ) 142 + }) 143 + 144 + test('does not rewrite data URIs', () => { 145 + const html = 146 + '<img src="">' 147 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 148 + expect(result).toBe( 149 + '<img src="">' 150 + ) 151 + }) 152 + 153 + test('does not rewrite mailto links', () => { 154 + const html = '<a href="mailto:test@example.com">Email</a>' 155 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 156 + expect(result).toBe('<a href="mailto:test@example.com">Email</a>') 157 + }) 158 + 159 + test('does not rewrite tel links', () => { 160 + const html = '<a href="tel:+1234567890">Call</a>' 161 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 162 + expect(result).toBe('<a href="tel:+1234567890">Call</a>') 163 + }) 164 + }) 165 + 166 + describe('different HTML attributes', () => { 167 + test('rewrites src attribute', () => { 168 + const html = '<img src="/image.png">' 169 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 170 + expect(result).toBe('<img src="/identifier/site/image.png">') 171 + }) 172 + 173 + test('rewrites href attribute', () => { 174 + const html = '<a href="/page.html">Link</a>' 175 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 176 + expect(result).toBe('<a href="/identifier/site/page.html">Link</a>') 177 + }) 178 + 179 + test('rewrites action attribute', () => { 180 + const html = '<form action="/submit"></form>' 181 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 182 + expect(result).toBe('<form action="/identifier/site/submit"></form>') 183 + }) 184 + 185 + test('rewrites data attribute', () => { 186 + const html = '<object data="/document.pdf"></object>' 187 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 188 + expect(result).toBe( 189 + '<object data="/identifier/site/document.pdf"></object>' 190 + ) 191 + }) 192 + 193 + test('rewrites poster attribute', () => { 194 + const html = '<video poster="/thumbnail.jpg"></video>' 195 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 196 + expect(result).toBe( 197 + '<video poster="/identifier/site/thumbnail.jpg"></video>' 198 + ) 199 + }) 200 + 201 + test('rewrites srcset attribute with single URL', () => { 202 + const html = '<img srcset="/image.png 1x">' 203 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 204 + expect(result).toBe( 205 + '<img srcset="/identifier/site/image.png 1x">' 206 + ) 207 + }) 208 + 209 + test('rewrites srcset attribute with multiple URLs', () => { 210 + const html = '<img srcset="/image-1x.png 1x, /image-2x.png 2x">' 211 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 212 + expect(result).toBe( 213 + '<img srcset="/identifier/site/image-1x.png 1x, /identifier/site/image-2x.png 2x">' 214 + ) 215 + }) 216 + 217 + test('rewrites srcset with width descriptors', () => { 218 + const html = '<img srcset="/small.jpg 320w, /large.jpg 1024w">' 219 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 220 + expect(result).toBe( 221 + '<img srcset="/identifier/site/small.jpg 320w, /identifier/site/large.jpg 1024w">' 222 + ) 223 + }) 224 + 225 + test('rewrites srcset with relative paths from nested document', () => { 226 + const html = '<img srcset="../img1.png 1x, ../img2.png 2x">' 227 + const result = rewriteHtmlPaths( 228 + html, 229 + basePath, 230 + 'folder1/folder2/index.html' 231 + ) 232 + expect(result).toBe( 233 + '<img srcset="/identifier/site/folder1/img1.png 1x, /identifier/site/folder1/img2.png 2x">' 234 + ) 235 + }) 236 + }) 237 + 238 + describe('quote handling', () => { 239 + test('handles double quotes', () => { 240 + const html = '<img src="/image.png">' 241 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 242 + expect(result).toBe('<img src="/identifier/site/image.png">') 243 + }) 244 + 245 + test('handles single quotes', () => { 246 + const html = "<img src='/image.png'>" 247 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 248 + expect(result).toBe("<img src='/identifier/site/image.png'>") 249 + }) 250 + 251 + test('handles mixed quotes in same document', () => { 252 + const html = '<img src="/img1.png"><link href=\'/style.css\'>' 253 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 254 + expect(result).toBe( 255 + '<img src="/identifier/site/img1.png"><link href=\'/identifier/site/style.css\'>' 256 + ) 257 + }) 258 + }) 259 + 260 + describe('multiple rewrites in same document', () => { 261 + test('rewrites multiple attributes in complex HTML', () => { 262 + const html = ` 263 + <!DOCTYPE html> 264 + <html> 265 + <head> 266 + <link href="/css/style.css" rel="stylesheet"> 267 + <script src="/js/app.js"></script> 268 + </head> 269 + <body> 270 + <img src="/images/logo.png" alt="Logo"> 271 + <a href="/about.html">About</a> 272 + <form action="/submit"> 273 + <button type="submit">Submit</button> 274 + </form> 275 + </body> 276 + </html> 277 + ` 278 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 279 + expect(result).toContain('href="/identifier/site/css/style.css"') 280 + expect(result).toContain('src="/identifier/site/js/app.js"') 281 + expect(result).toContain('src="/identifier/site/images/logo.png"') 282 + expect(result).toContain('href="/identifier/site/about.html"') 283 + expect(result).toContain('action="/identifier/site/submit"') 284 + }) 285 + 286 + test('handles mix of relative and absolute paths', () => { 287 + const html = ` 288 + <img src="/abs/image.png"> 289 + <img src="./rel/image.png"> 290 + <img src="../parent/image.png"> 291 + <img src="https://external.com/image.png"> 292 + ` 293 + const result = rewriteHtmlPaths( 294 + html, 295 + basePath, 296 + 'folder1/folder2/page.html' 297 + ) 298 + expect(result).toContain('src="/identifier/site/abs/image.png"') 299 + expect(result).toContain( 300 + 'src="/identifier/site/folder1/folder2/rel/image.png"' 301 + ) 302 + expect(result).toContain( 303 + 'src="/identifier/site/folder1/parent/image.png"' 304 + ) 305 + expect(result).toContain('src="https://external.com/image.png"') 306 + }) 307 + }) 308 + 309 + describe('edge cases', () => { 310 + test('handles empty src attribute', () => { 311 + const html = '<img src="">' 312 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 313 + expect(result).toBe('<img src="">') 314 + }) 315 + 316 + test('handles basePath without trailing slash', () => { 317 + const html = '<img src="/image.png">' 318 + const result = rewriteHtmlPaths(html, '/identifier/site', 'index.html') 319 + expect(result).toBe('<img src="/identifier/site/image.png">') 320 + }) 321 + 322 + test('handles basePath with trailing slash', () => { 323 + const html = '<img src="/image.png">' 324 + const result = rewriteHtmlPaths( 325 + html, 326 + '/identifier/site/', 327 + 'index.html' 328 + ) 329 + expect(result).toBe('<img src="/identifier/site/image.png">') 330 + }) 331 + 332 + test('handles whitespace around equals sign', () => { 333 + const html = '<img src = "/image.png">' 334 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 335 + expect(result).toBe('<img src="/identifier/site/image.png">') 336 + }) 337 + 338 + test('preserves query strings in URLs', () => { 339 + const html = '<img src="/image.png?v=123">' 340 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 341 + expect(result).toBe('<img src="/identifier/site/image.png?v=123">') 342 + }) 343 + 344 + test('preserves hash fragments in URLs', () => { 345 + const html = '<a href="/page.html#section">Link</a>' 346 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 347 + expect(result).toBe( 348 + '<a href="/identifier/site/page.html#section">Link</a>' 349 + ) 350 + }) 351 + 352 + test('handles paths with special characters', () => { 353 + const html = '<img src="/folder-name/file_name.png">' 354 + const result = rewriteHtmlPaths(html, basePath, 'index.html') 355 + expect(result).toBe( 356 + '<img src="/identifier/site/folder-name/file_name.png">' 357 + ) 358 + }) 359 + }) 360 + 361 + describe('real-world scenario', () => { 362 + test('handles the example from the bug report', () => { 363 + // HTML file at: /folder1/folder2/folder3/index.html 364 + // Image at: /folder1/folder2/img.png 365 + // Reference: src="../img.png" 366 + const html = '<img src="../img.png">' 367 + const result = rewriteHtmlPaths( 368 + html, 369 + basePath, 370 + 'folder1/folder2/folder3/index.html' 371 + ) 372 + expect(result).toBe( 373 + '<img src="/identifier/site/folder1/folder2/img.png">' 374 + ) 375 + }) 376 + 377 + test('handles deeply nested static site structure', () => { 378 + // A typical static site with nested pages and shared assets 379 + const html = ` 380 + <!DOCTYPE html> 381 + <html> 382 + <head> 383 + <link href="../../css/style.css" rel="stylesheet"> 384 + <link href="../../css/theme.css" rel="stylesheet"> 385 + <script src="../../js/main.js"></script> 386 + </head> 387 + <body> 388 + <img src="../../images/logo.png" alt="Logo"> 389 + <img src="./post-image.jpg" alt="Post"> 390 + <a href="../index.html">Back to Blog</a> 391 + <a href="../../index.html">Home</a> 392 + </body> 393 + </html> 394 + ` 395 + const result = rewriteHtmlPaths( 396 + html, 397 + basePath, 398 + 'blog/posts/my-post.html' 399 + ) 400 + 401 + // Assets two levels up 402 + expect(result).toContain('href="/identifier/site/css/style.css"') 403 + expect(result).toContain('href="/identifier/site/css/theme.css"') 404 + expect(result).toContain('src="/identifier/site/js/main.js"') 405 + expect(result).toContain('src="/identifier/site/images/logo.png"') 406 + 407 + // Same directory 408 + expect(result).toContain( 409 + 'src="/identifier/site/blog/posts/post-image.jpg"' 410 + ) 411 + 412 + // One level up 413 + expect(result).toContain('href="/identifier/site/blog/index.html"') 414 + 415 + // Two levels up 416 + expect(result).toContain('href="/identifier/site/index.html"') 417 + }) 418 + }) 419 + }) 420 + 421 + describe('isHtmlContent', () => { 422 + test('identifies HTML by content type', () => { 423 + expect(isHtmlContent('file.txt', 'text/html')).toBe(true) 424 + expect(isHtmlContent('file.txt', 'text/html; charset=utf-8')).toBe( 425 + true 426 + ) 427 + }) 428 + 429 + test('identifies HTML by .html extension', () => { 430 + expect(isHtmlContent('index.html')).toBe(true) 431 + expect(isHtmlContent('page.html', undefined)).toBe(true) 432 + expect(isHtmlContent('/path/to/file.html')).toBe(true) 433 + }) 434 + 435 + test('identifies HTML by .htm extension', () => { 436 + expect(isHtmlContent('index.htm')).toBe(true) 437 + expect(isHtmlContent('page.htm', undefined)).toBe(true) 438 + }) 439 + 440 + test('handles case-insensitive extensions', () => { 441 + expect(isHtmlContent('INDEX.HTML')).toBe(true) 442 + expect(isHtmlContent('page.HTM')).toBe(true) 443 + expect(isHtmlContent('File.HtMl')).toBe(true) 444 + }) 445 + 446 + test('returns false for non-HTML files', () => { 447 + expect(isHtmlContent('script.js')).toBe(false) 448 + expect(isHtmlContent('style.css')).toBe(false) 449 + expect(isHtmlContent('image.png')).toBe(false) 450 + expect(isHtmlContent('data.json')).toBe(false) 451 + }) 452 + 453 + test('returns false for files with no extension', () => { 454 + expect(isHtmlContent('README')).toBe(false) 455 + expect(isHtmlContent('Makefile')).toBe(false) 456 + }) 457 + })
+178 -99
hosting-service/src/lib/html-rewriter.ts
··· 4 4 */ 5 5 6 6 const REWRITABLE_ATTRIBUTES = [ 7 - 'src', 8 - 'href', 9 - 'action', 10 - 'data', 11 - 'poster', 12 - 'srcset', 13 - ] as const; 7 + 'src', 8 + 'href', 9 + 'action', 10 + 'data', 11 + 'poster', 12 + 'srcset' 13 + ] as const 14 14 15 15 /** 16 16 * Check if a path should be rewritten 17 17 */ 18 18 function shouldRewritePath(path: string): boolean { 19 - // Don't rewrite empty paths 20 - if (!path) return false; 19 + // Don't rewrite empty paths 20 + if (!path) return false 21 21 22 - // Don't rewrite external URLs (http://, https://, //) 23 - if (path.startsWith('http://') || path.startsWith('https://') || path.startsWith('//')) { 24 - return false; 25 - } 22 + // Don't rewrite external URLs (http://, https://, //) 23 + if ( 24 + path.startsWith('http://') || 25 + path.startsWith('https://') || 26 + path.startsWith('//') 27 + ) { 28 + return false 29 + } 26 30 27 - // Don't rewrite data URIs or other schemes (except file paths) 28 - if (path.includes(':') && !path.startsWith('./') && !path.startsWith('../')) { 29 - return false; 30 - } 31 + // Don't rewrite data URIs or other schemes (except file paths) 32 + if ( 33 + path.includes(':') && 34 + !path.startsWith('./') && 35 + !path.startsWith('../') 36 + ) { 37 + return false 38 + } 31 39 32 - // Don't rewrite pure anchors or paths that start with /# 33 - if (path.startsWith('#') || path.startsWith('/#')) return false; 40 + // Rewrite absolute paths (/) and relative paths (./ or ../ or plain filenames) 41 + return true 42 + } 43 + 44 + /** 45 + * Normalize a path by resolving . and .. segments 46 + */ 47 + function normalizePath(path: string): string { 48 + const parts = path.split('/') 49 + const result: string[] = [] 50 + 51 + for (const part of parts) { 52 + if (part === '.' || part === '') { 53 + // Skip current directory and empty parts (but keep leading empty for absolute paths) 54 + if (part === '' && result.length === 0) { 55 + result.push(part) 56 + } 57 + continue 58 + } 59 + if (part === '..') { 60 + // Go up one directory (but not past root) 61 + if (result.length > 0 && result[result.length - 1] !== '..') { 62 + result.pop() 63 + } 64 + continue 65 + } 66 + result.push(part) 67 + } 34 68 35 - // Don't rewrite relative paths (./ or ../) 36 - if (path.startsWith('./') || path.startsWith('../')) return false; 69 + return result.join('/') 70 + } 37 71 38 - // Rewrite absolute paths (/) 39 - return true; 72 + /** 73 + * Get the directory path from a file path 74 + * e.g., "folder1/folder2/file.html" -> "folder1/folder2/" 75 + */ 76 + function getDirectory(filepath: string): string { 77 + const lastSlash = filepath.lastIndexOf('/') 78 + if (lastSlash === -1) { 79 + return '' 80 + } 81 + return filepath.substring(0, lastSlash + 1) 40 82 } 41 83 42 84 /** 43 85 * Rewrite a single path 44 86 */ 45 - function rewritePath(path: string, basePath: string): string { 46 - if (!shouldRewritePath(path)) { 47 - return path; 48 - } 87 + function rewritePath( 88 + path: string, 89 + basePath: string, 90 + documentPath: string 91 + ): string { 92 + if (!shouldRewritePath(path)) { 93 + return path 94 + } 95 + 96 + // Handle absolute paths: /file.js -> /base/file.js 97 + if (path.startsWith('/')) { 98 + return basePath + path.slice(1) 99 + } 100 + 101 + // Handle relative paths by resolving against document directory 102 + const documentDir = getDirectory(documentPath) 103 + let resolvedPath: string 49 104 50 - // Handle absolute paths: /file.js -> /base/file.js 51 - if (path.startsWith('/')) { 52 - return basePath + path.slice(1); 53 - } 105 + if (path.startsWith('./')) { 106 + // ./file.js relative to current directory 107 + resolvedPath = documentDir + path.slice(2) 108 + } else if (path.startsWith('../')) { 109 + // ../file.js relative to parent directory 110 + resolvedPath = documentDir + path 111 + } else { 112 + // file.js (no prefix) - treat as relative to current directory 113 + resolvedPath = documentDir + path 114 + } 54 115 55 - // At this point, only plain filenames without ./ or ../ prefix should reach here 56 - // But since we're filtering those in shouldRewritePath, this shouldn't happen 57 - return path; 116 + // Normalize the path to resolve .. and . 117 + resolvedPath = normalizePath(resolvedPath) 118 + 119 + return basePath + resolvedPath 58 120 } 59 121 60 122 /** 61 123 * Rewrite srcset attribute (can contain multiple URLs) 62 124 * Format: "url1 1x, url2 2x" or "url1 100w, url2 200w" 63 125 */ 64 - function rewriteSrcset(srcset: string, basePath: string): string { 65 - return srcset 66 - .split(',') 67 - .map(part => { 68 - const trimmed = part.trim(); 69 - const spaceIndex = trimmed.indexOf(' '); 126 + function rewriteSrcset( 127 + srcset: string, 128 + basePath: string, 129 + documentPath: string 130 + ): string { 131 + return srcset 132 + .split(',') 133 + .map((part) => { 134 + const trimmed = part.trim() 135 + const spaceIndex = trimmed.indexOf(' ') 70 136 71 - if (spaceIndex === -1) { 72 - // No descriptor, just URL 73 - return rewritePath(trimmed, basePath); 74 - } 137 + if (spaceIndex === -1) { 138 + // No descriptor, just URL 139 + return rewritePath(trimmed, basePath, documentPath) 140 + } 75 141 76 - const url = trimmed.substring(0, spaceIndex); 77 - const descriptor = trimmed.substring(spaceIndex); 78 - return rewritePath(url, basePath) + descriptor; 79 - }) 80 - .join(', '); 142 + const url = trimmed.substring(0, spaceIndex) 143 + const descriptor = trimmed.substring(spaceIndex) 144 + return rewritePath(url, basePath, documentPath) + descriptor 145 + }) 146 + .join(', ') 81 147 } 82 148 83 149 /** 84 - * Rewrite absolute paths in HTML content 150 + * Rewrite absolute and relative paths in HTML content 85 151 * Uses simple regex matching for safety (no full HTML parsing) 86 152 */ 87 - export function rewriteHtmlPaths(html: string, basePath: string): string { 88 - // Ensure base path ends with / 89 - const normalizedBase = basePath.endsWith('/') ? basePath : basePath + '/'; 153 + export function rewriteHtmlPaths( 154 + html: string, 155 + basePath: string, 156 + documentPath: string 157 + ): string { 158 + // Ensure base path ends with / 159 + const normalizedBase = basePath.endsWith('/') ? basePath : basePath + '/' 90 160 91 - let rewritten = html; 161 + let rewritten = html 92 162 93 - // Rewrite each attribute type 94 - // Use more specific patterns to prevent ReDoS attacks 95 - for (const attr of REWRITABLE_ATTRIBUTES) { 96 - if (attr === 'srcset') { 97 - // Special handling for srcset - use possessive quantifiers via atomic grouping simulation 98 - // Limit whitespace to reasonable amount (max 5 spaces) to prevent ReDoS 99 - const srcsetRegex = new RegExp( 100 - `\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`, 101 - 'gi' 102 - ); 103 - rewritten = rewritten.replace(srcsetRegex, (match, value) => { 104 - const rewrittenValue = rewriteSrcset(value, normalizedBase); 105 - return `${attr}="${rewrittenValue}"`; 106 - }); 107 - } else { 108 - // Regular attributes with quoted values 109 - // Limit whitespace to prevent catastrophic backtracking 110 - const doubleQuoteRegex = new RegExp( 111 - `\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`, 112 - 'gi' 113 - ); 114 - const singleQuoteRegex = new RegExp( 115 - `\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}'([^']*)'`, 116 - 'gi' 117 - ); 163 + // Rewrite each attribute type 164 + // Use more specific patterns to prevent ReDoS attacks 165 + for (const attr of REWRITABLE_ATTRIBUTES) { 166 + if (attr === 'srcset') { 167 + // Special handling for srcset - use possessive quantifiers via atomic grouping simulation 168 + // Limit whitespace to reasonable amount (max 5 spaces) to prevent ReDoS 169 + const srcsetRegex = new RegExp( 170 + `\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`, 171 + 'gi' 172 + ) 173 + rewritten = rewritten.replace(srcsetRegex, (match, value) => { 174 + const rewrittenValue = rewriteSrcset( 175 + value, 176 + normalizedBase, 177 + documentPath 178 + ) 179 + return `${attr}="${rewrittenValue}"` 180 + }) 181 + } else { 182 + // Regular attributes with quoted values 183 + // Limit whitespace to prevent catastrophic backtracking 184 + const doubleQuoteRegex = new RegExp( 185 + `\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}"([^"]*)"`, 186 + 'gi' 187 + ) 188 + const singleQuoteRegex = new RegExp( 189 + `\\b${attr}[ \\t]{0,5}=[ \\t]{0,5}'([^']*)'`, 190 + 'gi' 191 + ) 118 192 119 - rewritten = rewritten.replace(doubleQuoteRegex, (match, value) => { 120 - const rewrittenValue = rewritePath(value, normalizedBase); 121 - return `${attr}="${rewrittenValue}"`; 122 - }); 193 + rewritten = rewritten.replace(doubleQuoteRegex, (match, value) => { 194 + const rewrittenValue = rewritePath( 195 + value, 196 + normalizedBase, 197 + documentPath 198 + ) 199 + return `${attr}="${rewrittenValue}"` 200 + }) 123 201 124 - rewritten = rewritten.replace(singleQuoteRegex, (match, value) => { 125 - const rewrittenValue = rewritePath(value, normalizedBase); 126 - return `${attr}='${rewrittenValue}'`; 127 - }); 128 - } 129 - } 202 + rewritten = rewritten.replace(singleQuoteRegex, (match, value) => { 203 + const rewrittenValue = rewritePath( 204 + value, 205 + normalizedBase, 206 + documentPath 207 + ) 208 + return `${attr}='${rewrittenValue}'` 209 + }) 210 + } 211 + } 130 212 131 - return rewritten; 213 + return rewritten 132 214 } 133 215 134 216 /** 135 217 * Check if content is HTML based on content or filename 136 218 */ 137 - export function isHtmlContent( 138 - filepath: string, 139 - contentType?: string 140 - ): boolean { 141 - if (contentType && contentType.includes('text/html')) { 142 - return true; 143 - } 219 + export function isHtmlContent(filepath: string, contentType?: string): boolean { 220 + if (contentType && contentType.includes('text/html')) { 221 + return true 222 + } 144 223 145 - const ext = filepath.toLowerCase().split('.').pop(); 146 - return ext === 'html' || ext === 'htm'; 224 + const ext = filepath.toLowerCase().split('.').pop() 225 + return ext === 'html' || ext === 'htm' 147 226 }
+3 -2
hosting-service/src/server.ts
··· 156 156 } else { 157 157 content = readFileSync(cachedFile, 'utf-8'); 158 158 } 159 - const rewritten = rewriteHtmlPaths(content, basePath); 159 + const rewritten = rewriteHtmlPaths(content, basePath, requestPath); 160 160 161 161 // Recompress the HTML for efficient delivery 162 162 const { gzipSync } = await import('zlib'); ··· 224 224 } else { 225 225 content = readFileSync(indexFile, 'utf-8'); 226 226 } 227 - const rewritten = rewriteHtmlPaths(content, basePath); 227 + const indexPath = `${requestPath}/index.html`; 228 + const rewritten = rewriteHtmlPaths(content, basePath, indexPath); 228 229 229 230 // Recompress the HTML for efficient delivery 230 231 const { gzipSync } = await import('zlib');