Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place

handle compression better

Changed files
+90 -59
hosting-service
+63 -25
hosting-service/src/lib/utils.ts
··· 15 rkey: string; 16 } 17 18 interface IpldLink { 19 $link: string; 20 } ··· 270 271 console.log(`[DEBUG] ${filePath}: fetched ${content.length} bytes, base64=${base64}, encoding=${encoding}, mimeType=${mimeType}`); 272 273 - // If content is base64-encoded, decode it back to binary (gzipped or not) 274 if (base64) { 275 const originalSize = content.length; 276 - // The content from the blob is base64 text, decode it directly to binary 277 - const buffer = Buffer.from(content); 278 - const base64String = buffer.toString('ascii'); // Use ascii for base64 text, not utf-8 279 - console.log(`[DEBUG] ${filePath}: base64 string first 100 chars: ${base64String.substring(0, 100)}`); 280 content = Buffer.from(base64String, 'base64'); 281 - console.log(`[DEBUG] ${filePath}: decoded from ${originalSize} bytes to ${content.length} bytes`); 282 283 // Check if it's actually gzipped by looking at magic bytes 284 if (content.length >= 2) { 285 - const magic = content[0] === 0x1f && content[1] === 0x8b; 286 - const byte0 = content[0]; 287 - const byte1 = content[1]; 288 - console.log(`[DEBUG] ${filePath}: has gzip magic bytes: ${magic} (0x${byte0?.toString(16)}, 0x${byte1?.toString(16)})`); 289 } 290 } 291 ··· 296 mkdirSync(fileDir, { recursive: true }); 297 } 298 299 - // Determine if this is a web asset that should remain compressed 300 - const webAssetTypes = [ 301 - 'text/html', 'text/css', 'application/javascript', 'text/javascript', 302 - 'application/json', 'text/xml', 'application/xml' 303 - ]; 304 - 305 - const isWebAsset = mimeType && webAssetTypes.some(type => 306 - mimeType.toLowerCase().startsWith(type) || mimeType.toLowerCase() === type 307 - ); 308 309 - // Decompress non-web assets that are gzipped 310 - if (encoding === 'gzip' && !isWebAsset && content.length >= 2 && 311 content[0] === 0x1f && content[1] === 0x8b) { 312 - console.log(`[DEBUG] ${filePath}: decompressing non-web asset (${mimeType}) before caching`); 313 try { 314 const { gunzipSync } = await import('zlib'); 315 const decompressed = gunzipSync(content); ··· 318 // Clear the encoding flag since we're storing decompressed 319 encoding = undefined; 320 } catch (error) { 321 - console.log(`[DEBUG] ${filePath}: failed to decompress, storing original gzipped content`); 322 } 323 } 324 325 await writeFile(cacheFile, content); 326 327 - // Store metadata only if file is still compressed (web assets) 328 if (encoding === 'gzip' && mimeType) { 329 const metaFile = `${cacheFile}.meta`; 330 await writeFile(metaFile, JSON.stringify({ encoding, mimeType })); 331 console.log('Cached file', filePath, content.length, 'bytes (gzipped,', mimeType + ')'); 332 } else { 333 - console.log('Cached file', filePath, content.length, 'bytes (decompressed)'); 334 } 335 } 336
··· 15 rkey: string; 16 } 17 18 + /** 19 + * Determines if a MIME type should benefit from gzip compression. 20 + * Returns true for text-based web assets (HTML, CSS, JS, JSON, XML, SVG). 21 + * Returns false for already-compressed formats (images, video, audio, PDFs). 22 + * 23 + */ 24 + export function shouldCompressMimeType(mimeType: string | undefined): boolean { 25 + if (!mimeType) return false; 26 + 27 + const mime = mimeType.toLowerCase(); 28 + 29 + // Text-based web assets that benefit from compression 30 + const compressibleTypes = [ 31 + 'text/html', 32 + 'text/css', 33 + 'text/javascript', 34 + 'application/javascript', 35 + 'application/x-javascript', 36 + 'text/xml', 37 + 'application/xml', 38 + 'application/json', 39 + 'text/plain', 40 + 'image/svg+xml', 41 + ]; 42 + 43 + if (compressibleTypes.some(type => mime === type || mime.startsWith(type))) { 44 + return true; 45 + } 46 + 47 + // Already-compressed formats that should NOT be double-compressed 48 + const alreadyCompressedPrefixes = [ 49 + 'video/', 50 + 'audio/', 51 + 'image/', 52 + 'application/pdf', 53 + 'application/zip', 54 + 'application/gzip', 55 + ]; 56 + 57 + if (alreadyCompressedPrefixes.some(prefix => mime.startsWith(prefix))) { 58 + return false; 59 + } 60 + 61 + // Default to not compressing for unknown types 62 + return false; 63 + } 64 + 65 interface IpldLink { 66 $link: string; 67 } ··· 317 318 console.log(`[DEBUG] ${filePath}: fetched ${content.length} bytes, base64=${base64}, encoding=${encoding}, mimeType=${mimeType}`); 319 320 + // If content is base64-encoded, decode it back to raw binary (gzipped or not) 321 if (base64) { 322 const originalSize = content.length; 323 + // Decode base64 directly from raw bytes - no string conversion 324 + // The blob contains base64-encoded text as raw bytes, decode it in-place 325 + const textDecoder = new TextDecoder(); 326 + const base64String = textDecoder.decode(content); 327 content = Buffer.from(base64String, 'base64'); 328 + console.log(`[DEBUG] ${filePath}: decoded base64 from ${originalSize} bytes to ${content.length} bytes`); 329 330 // Check if it's actually gzipped by looking at magic bytes 331 if (content.length >= 2) { 332 + const hasGzipMagic = content[0] === 0x1f && content[1] === 0x8b; 333 + console.log(`[DEBUG] ${filePath}: has gzip magic bytes: ${hasGzipMagic}`); 334 } 335 } 336 ··· 341 mkdirSync(fileDir, { recursive: true }); 342 } 343 344 + // Use the shared function to determine if this should remain compressed 345 + const shouldStayCompressed = shouldCompressMimeType(mimeType); 346 347 + // Decompress files that shouldn't be stored compressed 348 + if (encoding === 'gzip' && !shouldStayCompressed && content.length >= 2 && 349 content[0] === 0x1f && content[1] === 0x8b) { 350 + console.log(`[DEBUG] ${filePath}: decompressing non-compressible type (${mimeType}) before caching`); 351 try { 352 const { gunzipSync } = await import('zlib'); 353 const decompressed = gunzipSync(content); ··· 356 // Clear the encoding flag since we're storing decompressed 357 encoding = undefined; 358 } catch (error) { 359 + console.log(`[DEBUG] ${filePath}: failed to decompress, storing original gzipped content. Error:`, error); 360 } 361 } 362 363 await writeFile(cacheFile, content); 364 365 + // Store metadata only if file is still compressed 366 if (encoding === 'gzip' && mimeType) { 367 const metaFile = `${cacheFile}.meta`; 368 await writeFile(metaFile, JSON.stringify({ encoding, mimeType })); 369 console.log('Cached file', filePath, content.length, 'bytes (gzipped,', mimeType + ')'); 370 } else { 371 + console.log('Cached file', filePath, content.length, 'bytes'); 372 } 373 } 374
+27 -34
hosting-service/src/server.ts
··· 1 import { Hono } from 'hono'; 2 import { getWispDomain, getCustomDomain, getCustomDomainByHash } from './lib/db'; 3 - import { resolveDid, getPdsForDid, fetchSiteRecord, downloadAndCacheSite, getCachedFilePath, isCached, sanitizePath } from './lib/utils'; 4 import { rewriteHtmlPaths, isHtmlContent } from './lib/html-rewriter'; 5 import { existsSync, readFileSync } from 'fs'; 6 import { lookup } from 'mime-types'; ··· 45 // Check actual content for gzip magic bytes 46 if (content.length >= 2) { 47 const hasGzipMagic = content[0] === 0x1f && content[1] === 0x8b; 48 - const byte0 = content[0]; 49 - const byte1 = content[1]; 50 - console.log(`[DEBUG SERVE] ${requestPath}: has gzip magic bytes=${hasGzipMagic} (0x${byte0?.toString(16)}, 0x${byte1?.toString(16)})`); 51 } 52 53 if (meta.encoding === 'gzip' && meta.mimeType) { 54 - // Don't serve already-compressed media formats with Content-Encoding: gzip 55 - // These formats (video, audio, images) are already compressed and the browser 56 - // can't decode them if we add another layer of compression 57 - const alreadyCompressedTypes = [ 58 - 'video/', 'audio/', 'image/jpeg', 'image/jpg', 'image/png', 59 - 'image/gif', 'image/webp', 'application/pdf' 60 - ]; 61 62 - const isAlreadyCompressed = alreadyCompressedTypes.some(type => 63 - meta.mimeType.toLowerCase().startsWith(type) 64 - ); 65 - 66 - if (isAlreadyCompressed) { 67 - // Decompress the file before serving 68 - console.log(`[DEBUG SERVE] ${requestPath}: decompressing already-compressed media type`); 69 const { gunzipSync } = await import('zlib'); 70 const decompressed = gunzipSync(content); 71 console.log(`[DEBUG SERVE] ${requestPath}: decompressed from ${content.length} to ${decompressed.length} bytes`); ··· 157 } 158 159 // Check if this is HTML content that needs rewriting 160 - // Note: For gzipped HTML with path rewriting, we need to decompress, rewrite, and serve uncompressed 161 - // This is a trade-off for the sites.wisp.place domain which needs path rewriting 162 if (isHtmlContent(requestPath, mimeType)) { 163 let content: string; 164 if (isGzipped) { ··· 169 content = readFileSync(cachedFile, 'utf-8'); 170 } 171 const rewritten = rewriteHtmlPaths(content, basePath); 172 - return new Response(rewritten, { 173 headers: { 174 'Content-Type': 'text/html; charset=utf-8', 175 }, 176 }); 177 } ··· 179 // Non-HTML files: serve gzipped content as-is with proper headers 180 const content = readFileSync(cachedFile); 181 if (isGzipped) { 182 - // Don't serve already-compressed media formats with Content-Encoding: gzip 183 - const alreadyCompressedTypes = [ 184 - 'video/', 'audio/', 'image/jpeg', 'image/jpg', 'image/png', 185 - 'image/gif', 'image/webp', 'application/pdf' 186 - ]; 187 - 188 - const isAlreadyCompressed = alreadyCompressedTypes.some(type => 189 - mimeType.toLowerCase().startsWith(type) 190 - ); 191 192 - if (isAlreadyCompressed) { 193 - // Decompress the file before serving 194 const { gunzipSync } = await import('zlib'); 195 const decompressed = gunzipSync(content); 196 return new Response(decompressed, { ··· 228 } 229 } 230 231 - // HTML needs path rewriting, so decompress if needed 232 let content: string; 233 if (isGzipped) { 234 const { gunzipSync } = await import('zlib'); ··· 238 content = readFileSync(indexFile, 'utf-8'); 239 } 240 const rewritten = rewriteHtmlPaths(content, basePath); 241 - return new Response(rewritten, { 242 headers: { 243 'Content-Type': 'text/html; charset=utf-8', 244 }, 245 }); 246 }
··· 1 import { Hono } from 'hono'; 2 import { getWispDomain, getCustomDomain, getCustomDomainByHash } from './lib/db'; 3 + import { resolveDid, getPdsForDid, fetchSiteRecord, downloadAndCacheSite, getCachedFilePath, isCached, sanitizePath, shouldCompressMimeType } from './lib/utils'; 4 import { rewriteHtmlPaths, isHtmlContent } from './lib/html-rewriter'; 5 import { existsSync, readFileSync } from 'fs'; 6 import { lookup } from 'mime-types'; ··· 45 // Check actual content for gzip magic bytes 46 if (content.length >= 2) { 47 const hasGzipMagic = content[0] === 0x1f && content[1] === 0x8b; 48 + console.log(`[DEBUG SERVE] ${requestPath}: has gzip magic bytes=${hasGzipMagic}`); 49 } 50 51 if (meta.encoding === 'gzip' && meta.mimeType) { 52 + // Use shared function to determine if this should be served compressed 53 + const shouldServeCompressed = shouldCompressMimeType(meta.mimeType); 54 55 + if (!shouldServeCompressed) { 56 + // This shouldn't happen if caching is working correctly, but handle it gracefully 57 + console.log(`[DEBUG SERVE] ${requestPath}: decompressing file that shouldn't be compressed (${meta.mimeType})`); 58 const { gunzipSync } = await import('zlib'); 59 const decompressed = gunzipSync(content); 60 console.log(`[DEBUG SERVE] ${requestPath}: decompressed from ${content.length} to ${decompressed.length} bytes`); ··· 146 } 147 148 // Check if this is HTML content that needs rewriting 149 + // We decompress, rewrite paths, then recompress for efficient delivery 150 if (isHtmlContent(requestPath, mimeType)) { 151 let content: string; 152 if (isGzipped) { ··· 157 content = readFileSync(cachedFile, 'utf-8'); 158 } 159 const rewritten = rewriteHtmlPaths(content, basePath); 160 + 161 + // Recompress the HTML for efficient delivery 162 + const { gzipSync } = await import('zlib'); 163 + const recompressed = gzipSync(Buffer.from(rewritten, 'utf-8')); 164 + 165 + return new Response(recompressed, { 166 headers: { 167 'Content-Type': 'text/html; charset=utf-8', 168 + 'Content-Encoding': 'gzip', 169 }, 170 }); 171 } ··· 173 // Non-HTML files: serve gzipped content as-is with proper headers 174 const content = readFileSync(cachedFile); 175 if (isGzipped) { 176 + // Use shared function to determine if this should be served compressed 177 + const shouldServeCompressed = shouldCompressMimeType(mimeType); 178 179 + if (!shouldServeCompressed) { 180 + // This shouldn't happen if caching is working correctly, but handle it gracefully 181 const { gunzipSync } = await import('zlib'); 182 const decompressed = gunzipSync(content); 183 return new Response(decompressed, { ··· 215 } 216 } 217 218 + // HTML needs path rewriting, decompress, rewrite, then recompress 219 let content: string; 220 if (isGzipped) { 221 const { gunzipSync } = await import('zlib'); ··· 225 content = readFileSync(indexFile, 'utf-8'); 226 } 227 const rewritten = rewriteHtmlPaths(content, basePath); 228 + 229 + // Recompress the HTML for efficient delivery 230 + const { gzipSync } = await import('zlib'); 231 + const recompressed = gzipSync(Buffer.from(rewritten, 'utf-8')); 232 + 233 + return new Response(recompressed, { 234 headers: { 235 'Content-Type': 'text/html; charset=utf-8', 236 + 'Content-Encoding': 'gzip', 237 }, 238 }); 239 }