streaming zip archiver/extractor
jsr.io/@mary/zip
typescript
jsr
1import { getDayOfMonth, getHours, getMinutes, getMonth, getSeconds, getYear } from '@mary/date-fns';
2import { textEncoder } from './utils/buffer.ts';
3
4/**
5 * file attributes for zip entries
6 */
7export interface ZipFileAttributes {
8 /** file permissions mode */
9 mode?: number;
10 /** user id of the file owner */
11 uid?: number;
12 /** group id of the file owner */
13 gid?: number;
14 /** modification time as unix timestamp */
15 mtime?: number;
16 /** owner username */
17 owner?: string;
18 /** group name */
19 group?: string;
20}
21
22/**
23 * represents a single entry in a zip archive
24 */
25export interface ZipEntry {
26 /** path and name of the file in the zip archive */
27 filename: string;
28 /** file content as string, bytes, or stream */
29 data: string | Uint8Array | ReadableStream<Uint8Array>;
30 /** file attributes like permissions and timestamps */
31 attrs?: ZipFileAttributes;
32 /** whether to compress the file data */
33 compress?: false | 'deflate';
34}
35
36const DEFAULT_ATTRS: ZipFileAttributes = {};
37
38// deno-lint-ignore no-control-regex
39const INVALID_FILENAME_CHARS = /[<>:"|?*\x00-\x1f]/;
40const INVALID_FILENAME_TRAVERSAL = /(?:^|[/\\])\.\.(?:[/\\]|$)/;
41// deno-lint-ignore no-control-regex
42const NON_ASCII_CHARS = /[^\x00-\x7f]/;
43
44function writeUtf8String(view: DataView, offset: number, length: number, str: string) {
45 const u8 = new Uint8Array(view.buffer, view.byteOffset + offset, length);
46 textEncoder.encodeInto(str, u8);
47}
48
49function writeUint32LE(view: DataView, offset: number, value: number) {
50 view.setUint32(offset, value, true);
51}
52function writeUint16LE(view: DataView, offset: number, value: number) {
53 view.setUint16(offset, value, true);
54}
55
56const CRC32_TABLE = /*#__PURE__*/ (() => {
57 const t = new Int32Array(256);
58
59 for (let i = 0; i < 256; ++i) {
60 let c = i, k = 9;
61 while (--k) c = (c & 1 ? 0xedb88320 : 0) ^ (c >>> 1);
62 t[i] = c;
63 }
64
65 return t;
66})();
67
68function crc32(chunk: Uint8Array, crc: number = 0xffffffff): number {
69 for (let idx = 0, len = chunk.length; idx < len; idx++) {
70 crc = CRC32_TABLE[(crc ^ chunk[idx]) & 0xff] ^ (crc >>> 8);
71 }
72
73 return crc ^ -1;
74}
75
76function unixToDosTime(unixTimestamp: number): { time: number; date: number } {
77 const date = new Date(unixTimestamp * 1000);
78
79 const dosTime = ((getSeconds(date) >> 1) & 0x1f) | ((getMinutes(date) & 0x3f) << 5) |
80 ((getHours(date) & 0x1f) << 11);
81
82 const dosDate = (getDayOfMonth(date) & 0x1f) |
83 (((getMonth(date) + 1) & 0x0f) << 5) |
84 (((getYear(date) - 1980) & 0x7f) << 9);
85
86 return { time: dosTime, date: dosDate };
87}
88
89function validateFilename(filename: string): void {
90 if (filename.length === 0) {
91 throw new Error(`invalid filename: cannot be empty`);
92 }
93
94 if (filename.length > 65535) {
95 throw new Error(`invalid filename: too long (max 65535 bytes)`);
96 }
97
98 if (INVALID_FILENAME_TRAVERSAL.test(filename)) {
99 throw new Error(`invalid filename: contains path traversal`);
100 }
101
102 if (filename.startsWith('/')) {
103 throw new Error(`invalid filename: is an absolute path`);
104 }
105
106 if (INVALID_FILENAME_CHARS.test(filename)) {
107 throw new Error('invalid filename: contains invalid characters');
108 }
109}
110
111function isNonAscii(filename: string): boolean {
112 // check if filename contains non-ASCII characters
113 return NON_ASCII_CHARS.test(filename);
114}
115
116/**
117 * creates a zip archive from entries and yields chunks as Uint8Array
118 * @param entries iterable of zip entries to include in the archive
119 * @returns async generator that yields zip file chunks
120 */
121export async function* zip(
122 entries: Iterable<ZipEntry> | AsyncIterable<ZipEntry>,
123): AsyncGenerator<Uint8Array> {
124 const listing: Uint8Array[] = [];
125 let offset: number = 0;
126
127 for await (const { filename, data, compress = 'deflate', attrs = DEFAULT_ATTRS } of entries) {
128 validateFilename(filename);
129
130 const startOffset = offset;
131
132 const fname = textEncoder.encode(filename);
133 const fnameLen = fname.length;
134
135 const mtimeSeconds = attrs?.mtime ?? Math.floor(Date.now() / 1000);
136 const { time: dosTime, date: dosDate } = unixToDosTime(mtimeSeconds);
137
138 let method: number = 0;
139 let crc: number = 0xffffffff;
140 let flags = 0x0008;
141
142 let uncompressedSize: number = 0;
143 let compressedSize: number = 0;
144
145 if (compress === 'deflate') {
146 method = 8;
147 }
148
149 if (isNonAscii(filename)) {
150 flags |= 0x0800;
151 }
152
153 // local header
154 {
155 const header = new ArrayBuffer(30 + fnameLen);
156 const view = new DataView(header);
157
158 writeUint32LE(view, 0, 0x04034b50); // local file header signature
159 writeUint16LE(view, 4, 20); // version needed to extract (2.0)
160 writeUint16LE(view, 6, flags); // general purpose bit flag
161 writeUint16LE(view, 8, method); // compression method (0=stored, 8=deflate)
162 writeUint16LE(view, 10, dosTime); // last mod file time (DOS format)
163 writeUint16LE(view, 12, dosDate); // last mod file date (DOS format)
164 writeUint32LE(view, 14, 0); // crc-32 (set to 0, actual value in data descriptor)
165 writeUint32LE(view, 18, 0); // compressed size (set to 0, actual value in data descriptor)
166 writeUint32LE(view, 22, 0); // uncompressed size (set to 0, actual value in data descriptor)
167 writeUint16LE(view, 26, fnameLen); // file name length
168 writeUint16LE(view, 28, 0); // extra field length
169
170 writeUtf8String(view, 30, fnameLen, filename);
171
172 offset += 30 + fnameLen;
173
174 yield new Uint8Array(header);
175 }
176
177 // data
178 if (compress === 'deflate') {
179 let stream: ReadableStream<Uint8Array>;
180 if (data instanceof ReadableStream) {
181 stream = data.pipeThrough(
182 new TransformStream({
183 transform(chunk, controller) {
184 uncompressedSize += chunk.length;
185 crc = crc32(chunk, crc);
186
187 controller.enqueue(chunk);
188 },
189 }),
190 );
191 } else {
192 const chunk = typeof data === 'string' ? textEncoder.encode(data) : data;
193
194 uncompressedSize = chunk.length;
195 crc = crc32(chunk, crc);
196
197 stream = new ReadableStream({
198 start(controller) {
199 controller.enqueue(chunk);
200 controller.close();
201 },
202 });
203 }
204
205 // @ts-expect-error: I don't knowwww
206 yield* stream.pipeThrough(new CompressionStream('deflate-raw')).pipeThrough(
207 new TransformStream({
208 transform(chunk, controller) {
209 controller.enqueue(chunk);
210
211 compressedSize += chunk.length;
212 },
213 }),
214 );
215 } else {
216 if (data instanceof ReadableStream) {
217 yield* data.pipeThrough(
218 new TransformStream({
219 transform(chunk, controller) {
220 uncompressedSize += chunk.length;
221 crc = crc32(chunk, crc);
222
223 controller.enqueue(chunk);
224 },
225 }),
226 );
227
228 compressedSize = uncompressedSize;
229 } else {
230 const chunk = typeof data === 'string' ? textEncoder.encode(data) : data;
231
232 uncompressedSize = chunk.length;
233 compressedSize = uncompressedSize;
234 crc = crc32(chunk, crc);
235
236 yield chunk;
237 }
238 }
239
240 offset += compressedSize;
241
242 // data descriptor
243 {
244 const descriptor = new ArrayBuffer(16);
245 const view = new DataView(descriptor);
246
247 // 0 - data descriptor signature
248 // 4 - CRC32 of uncompressed data
249 // 8 - compressed size
250 // 12 - uncompressed size
251 writeUint32LE(view, 0, 0x08074b50);
252 writeUint32LE(view, 4, crc);
253 writeUint32LE(view, 8, compressedSize);
254 writeUint32LE(view, 12, uncompressedSize);
255
256 offset += 16;
257
258 yield new Uint8Array(descriptor);
259 }
260
261 // central directory record
262 {
263 const record = new ArrayBuffer(46 + fnameLen);
264 const view = new DataView(record);
265
266 const mode = attrs?.mode ?? 0o100644;
267 const externalAttrs = (mode & 0xffff) << 16;
268
269 // 0 - central directory record signature
270 // 4 - version used to create this archive
271 // 6 - minimum required version for extraction
272 // 8 - general purpose bitflag
273 // 10 - compression method
274 // 12 - file last modification time
275 // 14 - file last modification date
276 // 16 - CRC32 of uncompressed data
277 // 20 - compressed size
278 // 24 - uncompressed size
279 // 28 - file name length
280 // 30 - extra fields length
281 // 32 - file comment length
282 // 34 - disk number containing start of file
283 // 36 - internal file attributes
284 // 38 - external file attributes
285 // 42 - offset to start of entry
286 writeUint32LE(view, 0, 0x02014b50);
287 writeUint16LE(view, 4, (3 << 8) | 20);
288 writeUint16LE(view, 6, 20);
289 writeUint16LE(view, 8, flags);
290 writeUint16LE(view, 10, method);
291 writeUint16LE(view, 12, dosTime);
292 writeUint16LE(view, 14, dosDate);
293 writeUint32LE(view, 16, crc);
294 writeUint32LE(view, 20, compressedSize);
295 writeUint32LE(view, 24, uncompressedSize);
296 writeUint16LE(view, 28, fnameLen);
297 writeUint16LE(view, 30, 0);
298 writeUint16LE(view, 32, 0);
299 writeUint16LE(view, 34, 0);
300 writeUint16LE(view, 36, 0);
301 writeUint32LE(view, 38, externalAttrs);
302 writeUint32LE(view, 42, startOffset);
303
304 writeUtf8String(view, 46, fnameLen, filename);
305
306 listing.push(new Uint8Array(record));
307 }
308 }
309
310 // central directory
311 {
312 const startCentralOffset = offset;
313 const recordCount = listing.length;
314
315 let centralSize = 0;
316
317 for (let idx = 0; idx < recordCount; idx++) {
318 const record = listing[idx];
319 const recordLen = record.length;
320
321 offset += recordLen;
322 centralSize += recordLen;
323
324 yield record;
325 }
326
327 {
328 const directory = new ArrayBuffer(22);
329 const view = new DataView(directory);
330
331 // 0 - end of central directory signature
332 // 4 - this disk's number
333 // 6 - disk number containing start of central directory
334 // 8 - amount of records in this disk's central directory
335 // 10 - total amount of central directory records
336 // 12 - total size of this disk's central directory records
337 // 16 - offset of this disk's central directory records
338 // 20 - comment length
339 writeUint32LE(view, 0, 0x06054b50);
340 writeUint16LE(view, 4, 0);
341 writeUint16LE(view, 6, 0);
342 writeUint16LE(view, 8, recordCount);
343 writeUint16LE(view, 10, recordCount);
344 writeUint32LE(view, 12, centralSize);
345 writeUint32LE(view, 16, startCentralOffset);
346 writeUint16LE(view, 20, 0);
347
348 yield new Uint8Array(directory);
349 }
350 }
351}