streaming zip archiver/extractor jsr.io/@mary/zip
typescript jsr
at trunk 10 kB view raw
1import { getDayOfMonth, getHours, getMinutes, getMonth, getSeconds, getYear } from '@mary/date-fns'; 2import { textEncoder } from './utils/buffer.ts'; 3 4/** 5 * file attributes for zip entries 6 */ 7export interface ZipFileAttributes { 8 /** file permissions mode */ 9 mode?: number; 10 /** user id of the file owner */ 11 uid?: number; 12 /** group id of the file owner */ 13 gid?: number; 14 /** modification time as unix timestamp */ 15 mtime?: number; 16 /** owner username */ 17 owner?: string; 18 /** group name */ 19 group?: string; 20} 21 22/** 23 * represents a single entry in a zip archive 24 */ 25export interface ZipEntry { 26 /** path and name of the file in the zip archive */ 27 filename: string; 28 /** file content as string, bytes, or stream */ 29 data: string | Uint8Array | ReadableStream<Uint8Array>; 30 /** file attributes like permissions and timestamps */ 31 attrs?: ZipFileAttributes; 32 /** whether to compress the file data */ 33 compress?: false | 'deflate'; 34} 35 36const DEFAULT_ATTRS: ZipFileAttributes = {}; 37 38// deno-lint-ignore no-control-regex 39const INVALID_FILENAME_CHARS = /[<>:"|?*\x00-\x1f]/; 40const INVALID_FILENAME_TRAVERSAL = /(?:^|[/\\])\.\.(?:[/\\]|$)/; 41// deno-lint-ignore no-control-regex 42const NON_ASCII_CHARS = /[^\x00-\x7f]/; 43 44function writeUtf8String(view: DataView, offset: number, length: number, str: string) { 45 const u8 = new Uint8Array(view.buffer, view.byteOffset + offset, length); 46 textEncoder.encodeInto(str, u8); 47} 48 49function writeUint32LE(view: DataView, offset: number, value: number) { 50 view.setUint32(offset, value, true); 51} 52function writeUint16LE(view: DataView, offset: number, value: number) { 53 view.setUint16(offset, value, true); 54} 55 56const CRC32_TABLE = /*#__PURE__*/ (() => { 57 const t = new Int32Array(256); 58 59 for (let i = 0; i < 256; ++i) { 60 let c = i, k = 9; 61 while (--k) c = (c & 1 ? 0xedb88320 : 0) ^ (c >>> 1); 62 t[i] = c; 63 } 64 65 return t; 66})(); 67 68function crc32(chunk: Uint8Array, crc: number = 0xffffffff): number { 69 for (let idx = 0, len = chunk.length; idx < len; idx++) { 70 crc = CRC32_TABLE[(crc ^ chunk[idx]) & 0xff] ^ (crc >>> 8); 71 } 72 73 return crc ^ -1; 74} 75 76function unixToDosTime(unixTimestamp: number): { time: number; date: number } { 77 const date = new Date(unixTimestamp * 1000); 78 79 const dosTime = ((getSeconds(date) >> 1) & 0x1f) | ((getMinutes(date) & 0x3f) << 5) | 80 ((getHours(date) & 0x1f) << 11); 81 82 const dosDate = (getDayOfMonth(date) & 0x1f) | 83 (((getMonth(date) + 1) & 0x0f) << 5) | 84 (((getYear(date) - 1980) & 0x7f) << 9); 85 86 return { time: dosTime, date: dosDate }; 87} 88 89function validateFilename(filename: string): void { 90 if (filename.length === 0) { 91 throw new Error(`invalid filename: cannot be empty`); 92 } 93 94 if (filename.length > 65535) { 95 throw new Error(`invalid filename: too long (max 65535 bytes)`); 96 } 97 98 if (INVALID_FILENAME_TRAVERSAL.test(filename)) { 99 throw new Error(`invalid filename: contains path traversal`); 100 } 101 102 if (filename.startsWith('/')) { 103 throw new Error(`invalid filename: is an absolute path`); 104 } 105 106 if (INVALID_FILENAME_CHARS.test(filename)) { 107 throw new Error('invalid filename: contains invalid characters'); 108 } 109} 110 111function isNonAscii(filename: string): boolean { 112 // check if filename contains non-ASCII characters 113 return NON_ASCII_CHARS.test(filename); 114} 115 116/** 117 * creates a zip archive from entries and yields chunks as Uint8Array 118 * @param entries iterable of zip entries to include in the archive 119 * @returns async generator that yields zip file chunks 120 */ 121export async function* zip( 122 entries: Iterable<ZipEntry> | AsyncIterable<ZipEntry>, 123): AsyncGenerator<Uint8Array> { 124 const listing: Uint8Array[] = []; 125 let offset: number = 0; 126 127 for await (const { filename, data, compress = 'deflate', attrs = DEFAULT_ATTRS } of entries) { 128 validateFilename(filename); 129 130 const startOffset = offset; 131 132 const fname = textEncoder.encode(filename); 133 const fnameLen = fname.length; 134 135 const mtimeSeconds = attrs?.mtime ?? Math.floor(Date.now() / 1000); 136 const { time: dosTime, date: dosDate } = unixToDosTime(mtimeSeconds); 137 138 let method: number = 0; 139 let crc: number = 0xffffffff; 140 let flags = 0x0008; 141 142 let uncompressedSize: number = 0; 143 let compressedSize: number = 0; 144 145 if (compress === 'deflate') { 146 method = 8; 147 } 148 149 if (isNonAscii(filename)) { 150 flags |= 0x0800; 151 } 152 153 // local header 154 { 155 const header = new ArrayBuffer(30 + fnameLen); 156 const view = new DataView(header); 157 158 writeUint32LE(view, 0, 0x04034b50); // local file header signature 159 writeUint16LE(view, 4, 20); // version needed to extract (2.0) 160 writeUint16LE(view, 6, flags); // general purpose bit flag 161 writeUint16LE(view, 8, method); // compression method (0=stored, 8=deflate) 162 writeUint16LE(view, 10, dosTime); // last mod file time (DOS format) 163 writeUint16LE(view, 12, dosDate); // last mod file date (DOS format) 164 writeUint32LE(view, 14, 0); // crc-32 (set to 0, actual value in data descriptor) 165 writeUint32LE(view, 18, 0); // compressed size (set to 0, actual value in data descriptor) 166 writeUint32LE(view, 22, 0); // uncompressed size (set to 0, actual value in data descriptor) 167 writeUint16LE(view, 26, fnameLen); // file name length 168 writeUint16LE(view, 28, 0); // extra field length 169 170 writeUtf8String(view, 30, fnameLen, filename); 171 172 offset += 30 + fnameLen; 173 174 yield new Uint8Array(header); 175 } 176 177 // data 178 if (compress === 'deflate') { 179 let stream: ReadableStream<Uint8Array>; 180 if (data instanceof ReadableStream) { 181 stream = data.pipeThrough( 182 new TransformStream({ 183 transform(chunk, controller) { 184 uncompressedSize += chunk.length; 185 crc = crc32(chunk, crc); 186 187 controller.enqueue(chunk); 188 }, 189 }), 190 ); 191 } else { 192 const chunk = typeof data === 'string' ? textEncoder.encode(data) : data; 193 194 uncompressedSize = chunk.length; 195 crc = crc32(chunk, crc); 196 197 stream = new ReadableStream({ 198 start(controller) { 199 controller.enqueue(chunk); 200 controller.close(); 201 }, 202 }); 203 } 204 205 // @ts-expect-error: I don't knowwww 206 yield* stream.pipeThrough(new CompressionStream('deflate-raw')).pipeThrough( 207 new TransformStream({ 208 transform(chunk, controller) { 209 controller.enqueue(chunk); 210 211 compressedSize += chunk.length; 212 }, 213 }), 214 ); 215 } else { 216 if (data instanceof ReadableStream) { 217 yield* data.pipeThrough( 218 new TransformStream({ 219 transform(chunk, controller) { 220 uncompressedSize += chunk.length; 221 crc = crc32(chunk, crc); 222 223 controller.enqueue(chunk); 224 }, 225 }), 226 ); 227 228 compressedSize = uncompressedSize; 229 } else { 230 const chunk = typeof data === 'string' ? textEncoder.encode(data) : data; 231 232 uncompressedSize = chunk.length; 233 compressedSize = uncompressedSize; 234 crc = crc32(chunk, crc); 235 236 yield chunk; 237 } 238 } 239 240 offset += compressedSize; 241 242 // data descriptor 243 { 244 const descriptor = new ArrayBuffer(16); 245 const view = new DataView(descriptor); 246 247 // 0 - data descriptor signature 248 // 4 - CRC32 of uncompressed data 249 // 8 - compressed size 250 // 12 - uncompressed size 251 writeUint32LE(view, 0, 0x08074b50); 252 writeUint32LE(view, 4, crc); 253 writeUint32LE(view, 8, compressedSize); 254 writeUint32LE(view, 12, uncompressedSize); 255 256 offset += 16; 257 258 yield new Uint8Array(descriptor); 259 } 260 261 // central directory record 262 { 263 const record = new ArrayBuffer(46 + fnameLen); 264 const view = new DataView(record); 265 266 const mode = attrs?.mode ?? 0o100644; 267 const externalAttrs = (mode & 0xffff) << 16; 268 269 // 0 - central directory record signature 270 // 4 - version used to create this archive 271 // 6 - minimum required version for extraction 272 // 8 - general purpose bitflag 273 // 10 - compression method 274 // 12 - file last modification time 275 // 14 - file last modification date 276 // 16 - CRC32 of uncompressed data 277 // 20 - compressed size 278 // 24 - uncompressed size 279 // 28 - file name length 280 // 30 - extra fields length 281 // 32 - file comment length 282 // 34 - disk number containing start of file 283 // 36 - internal file attributes 284 // 38 - external file attributes 285 // 42 - offset to start of entry 286 writeUint32LE(view, 0, 0x02014b50); 287 writeUint16LE(view, 4, (3 << 8) | 20); 288 writeUint16LE(view, 6, 20); 289 writeUint16LE(view, 8, flags); 290 writeUint16LE(view, 10, method); 291 writeUint16LE(view, 12, dosTime); 292 writeUint16LE(view, 14, dosDate); 293 writeUint32LE(view, 16, crc); 294 writeUint32LE(view, 20, compressedSize); 295 writeUint32LE(view, 24, uncompressedSize); 296 writeUint16LE(view, 28, fnameLen); 297 writeUint16LE(view, 30, 0); 298 writeUint16LE(view, 32, 0); 299 writeUint16LE(view, 34, 0); 300 writeUint16LE(view, 36, 0); 301 writeUint32LE(view, 38, externalAttrs); 302 writeUint32LE(view, 42, startOffset); 303 304 writeUtf8String(view, 46, fnameLen, filename); 305 306 listing.push(new Uint8Array(record)); 307 } 308 } 309 310 // central directory 311 { 312 const startCentralOffset = offset; 313 const recordCount = listing.length; 314 315 let centralSize = 0; 316 317 for (let idx = 0; idx < recordCount; idx++) { 318 const record = listing[idx]; 319 const recordLen = record.length; 320 321 offset += recordLen; 322 centralSize += recordLen; 323 324 yield record; 325 } 326 327 { 328 const directory = new ArrayBuffer(22); 329 const view = new DataView(directory); 330 331 // 0 - end of central directory signature 332 // 4 - this disk's number 333 // 6 - disk number containing start of central directory 334 // 8 - amount of records in this disk's central directory 335 // 10 - total amount of central directory records 336 // 12 - total size of this disk's central directory records 337 // 16 - offset of this disk's central directory records 338 // 20 - comment length 339 writeUint32LE(view, 0, 0x06054b50); 340 writeUint16LE(view, 4, 0); 341 writeUint16LE(view, 6, 0); 342 writeUint16LE(view, 8, recordCount); 343 writeUint16LE(view, 10, recordCount); 344 writeUint32LE(view, 12, centralSize); 345 writeUint32LE(view, 16, startCentralOffset); 346 writeUint16LE(view, 20, 0); 347 348 yield new Uint8Array(directory); 349 } 350 } 351}