this repo has no description
1#!/usr/bin/env python3
2# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
3# WARNING: This is a temporary copy of code from the cpython library to
4# facilitate bringup. Please file a task for anything you change!
5# flake8: noqa
6# fmt: off
7"""
8Read and write ZIP files.
9
10XXX references to utf-8 need further investigation.
11"""
12import binascii
13import contextlib
14import functools
15import importlib.util
16import io
17import itertools
18import os
19import posixpath
20import shutil
21import stat
22import struct
23import sys
24import threading
25import time
26
27try:
28 import zlib # We may need its compression method
29 crc32 = zlib.crc32
30except ImportError:
31 zlib = None
32 crc32 = binascii.crc32
33
34try:
35 import bz2 # We may need its compression method
36except ImportError:
37 bz2 = None
38
39try:
40 import lzma # We may need its compression method
41except ImportError:
42 lzma = None
43
44__all__ = ["BadZipFile", "BadZipfile", "error",
45 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
46 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
47 "Path"]
48
49class BadZipFile(Exception):
50 pass
51
52
53class LargeZipFile(Exception):
54 """
55 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
56 and those extensions are disabled.
57 """
58
59error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
60
61
62ZIP64_LIMIT = (1 << 31) - 1
63ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
64ZIP_MAX_COMMENT = (1 << 16) - 1
65
66# constants for Zip file compression methods
67ZIP_STORED = 0
68ZIP_DEFLATED = 8
69ZIP_BZIP2 = 12
70ZIP_LZMA = 14
71# Other ZIP compression methods not supported
72
73DEFAULT_VERSION = 20
74ZIP64_VERSION = 45
75BZIP2_VERSION = 46
76LZMA_VERSION = 63
77# we recognize (but not necessarily support) all features up to that version
78MAX_EXTRACT_VERSION = 63
79
80# Below are some formats and associated data for reading/writing headers using
81# the struct module. The names and structures of headers/records are those used
82# in the PKWARE description of the ZIP file format:
83# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
84# (URL valid as of January 2008)
85
86# The "end of central directory" structure, magic number, size, and indices
87# (section V.I in the format document)
88structEndArchive = b"<4s4H2LH"
89stringEndArchive = b"PK\005\006"
90sizeEndCentDir = struct.calcsize(structEndArchive)
91
92_ECD_SIGNATURE = 0
93_ECD_DISK_NUMBER = 1
94_ECD_DISK_START = 2
95_ECD_ENTRIES_THIS_DISK = 3
96_ECD_ENTRIES_TOTAL = 4
97_ECD_SIZE = 5
98_ECD_OFFSET = 6
99_ECD_COMMENT_SIZE = 7
100# These last two indices are not part of the structure as defined in the
101# spec, but they are used internally by this module as a convenience
102_ECD_COMMENT = 8
103_ECD_LOCATION = 9
104
105# The "central directory" structure, magic number, size, and indices
106# of entries in the structure (section V.F in the format document)
107structCentralDir = "<4s4B4HL2L5H2L"
108stringCentralDir = b"PK\001\002"
109sizeCentralDir = struct.calcsize(structCentralDir)
110
111# indexes of entries in the central directory structure
112_CD_SIGNATURE = 0
113_CD_CREATE_VERSION = 1
114_CD_CREATE_SYSTEM = 2
115_CD_EXTRACT_VERSION = 3
116_CD_EXTRACT_SYSTEM = 4
117_CD_FLAG_BITS = 5
118_CD_COMPRESS_TYPE = 6
119_CD_TIME = 7
120_CD_DATE = 8
121_CD_CRC = 9
122_CD_COMPRESSED_SIZE = 10
123_CD_UNCOMPRESSED_SIZE = 11
124_CD_FILENAME_LENGTH = 12
125_CD_EXTRA_FIELD_LENGTH = 13
126_CD_COMMENT_LENGTH = 14
127_CD_DISK_NUMBER_START = 15
128_CD_INTERNAL_FILE_ATTRIBUTES = 16
129_CD_EXTERNAL_FILE_ATTRIBUTES = 17
130_CD_LOCAL_HEADER_OFFSET = 18
131
132# The "local file header" structure, magic number, size, and indices
133# (section V.A in the format document)
134structFileHeader = "<4s2B4HL2L2H"
135stringFileHeader = b"PK\003\004"
136sizeFileHeader = struct.calcsize(structFileHeader)
137
138_FH_SIGNATURE = 0
139_FH_EXTRACT_VERSION = 1
140_FH_EXTRACT_SYSTEM = 2
141_FH_GENERAL_PURPOSE_FLAG_BITS = 3
142_FH_COMPRESSION_METHOD = 4
143_FH_LAST_MOD_TIME = 5
144_FH_LAST_MOD_DATE = 6
145_FH_CRC = 7
146_FH_COMPRESSED_SIZE = 8
147_FH_UNCOMPRESSED_SIZE = 9
148_FH_FILENAME_LENGTH = 10
149_FH_EXTRA_FIELD_LENGTH = 11
150
151# The "Zip64 end of central directory locator" structure, magic number, and size
152structEndArchive64Locator = "<4sLQL"
153stringEndArchive64Locator = b"PK\x06\x07"
154sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
155
156# The "Zip64 end of central directory" record, magic number, size, and indices
157# (section V.G in the format document)
158structEndArchive64 = "<4sQ2H2L4Q"
159stringEndArchive64 = b"PK\x06\x06"
160sizeEndCentDir64 = struct.calcsize(structEndArchive64)
161
162_CD64_SIGNATURE = 0
163_CD64_DIRECTORY_RECSIZE = 1
164_CD64_CREATE_VERSION = 2
165_CD64_EXTRACT_VERSION = 3
166_CD64_DISK_NUMBER = 4
167_CD64_DISK_NUMBER_START = 5
168_CD64_NUMBER_ENTRIES_THIS_DISK = 6
169_CD64_NUMBER_ENTRIES_TOTAL = 7
170_CD64_DIRECTORY_SIZE = 8
171_CD64_OFFSET_START_CENTDIR = 9
172
173_DD_SIGNATURE = 0x08074b50
174
175# TODO(T65337126): Uncomment next line
176# _EXTRA_FIELD_STRUCT = struct.Struct('<HH')
177
178def _strip_extra(extra, xids):
179 # TODO(T65337126): Remove next line
180 _EXTRA_FIELD_STRUCT = struct.Struct('<HH')
181 # Remove Extra Fields with specified IDs.
182 unpack = _EXTRA_FIELD_STRUCT.unpack
183 modified = False
184 buffer = []
185 start = i = 0
186 while i + 4 <= len(extra):
187 xid, xlen = unpack(extra[i : i + 4])
188 j = i + 4 + xlen
189 if xid in xids:
190 if i != start:
191 buffer.append(extra[start : i])
192 start = j
193 modified = True
194 i = j
195 if not modified:
196 return extra
197 return b''.join(buffer)
198
199def _check_zipfile(fp):
200 try:
201 if _EndRecData(fp):
202 return True # file has correct magic number
203 except OSError:
204 pass
205 return False
206
207def is_zipfile(filename):
208 """Quickly see if a file is a ZIP file by checking the magic number.
209
210 The filename argument may be a file or file-like object too.
211 """
212 result = False
213 try:
214 if hasattr(filename, "read"):
215 result = _check_zipfile(fp=filename)
216 else:
217 with open(filename, "rb") as fp:
218 result = _check_zipfile(fp)
219 except OSError:
220 pass
221 return result
222
223def _EndRecData64(fpin, offset, endrec):
224 """
225 Read the ZIP64 end-of-archive records and use that to update endrec
226 """
227 try:
228 fpin.seek(offset - sizeEndCentDir64Locator, 2)
229 except OSError:
230 # If the seek fails, the file is not large enough to contain a ZIP64
231 # end-of-archive record, so just return the end record we were given.
232 return endrec
233
234 data = fpin.read(sizeEndCentDir64Locator)
235 if len(data) != sizeEndCentDir64Locator:
236 return endrec
237 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
238 if sig != stringEndArchive64Locator:
239 return endrec
240
241 if diskno != 0 or disks > 1:
242 raise BadZipFile("zipfiles that span multiple disks are not supported")
243
244 # Assume no 'zip64 extensible data'
245 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
246 data = fpin.read(sizeEndCentDir64)
247 if len(data) != sizeEndCentDir64:
248 return endrec
249 sig, sz, create_version, read_version, disk_num, disk_dir, \
250 dircount, dircount2, dirsize, diroffset = \
251 struct.unpack(structEndArchive64, data)
252 if sig != stringEndArchive64:
253 return endrec
254
255 # Update the original endrec using data from the ZIP64 record
256 endrec[_ECD_SIGNATURE] = sig
257 endrec[_ECD_DISK_NUMBER] = disk_num
258 endrec[_ECD_DISK_START] = disk_dir
259 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
260 endrec[_ECD_ENTRIES_TOTAL] = dircount2
261 endrec[_ECD_SIZE] = dirsize
262 endrec[_ECD_OFFSET] = diroffset
263 return endrec
264
265
266def _EndRecData(fpin):
267 """Return data from the "End of Central Directory" record, or None.
268
269 The data is a list of the nine items in the ZIP "End of central dir"
270 record followed by a tenth item, the file seek offset of this record."""
271
272 # Determine file size
273 fpin.seek(0, 2)
274 filesize = fpin.tell()
275
276 # Check to see if this is ZIP file with no archive comment (the
277 # "end of central directory" structure should be the last item in the
278 # file if this is the case).
279 try:
280 fpin.seek(-sizeEndCentDir, 2)
281 except OSError:
282 return None
283 data = fpin.read()
284 if (len(data) == sizeEndCentDir and
285 data[0:4] == stringEndArchive and
286 data[-2:] == b"\000\000"):
287 # the signature is correct and there's no comment, unpack structure
288 endrec = struct.unpack(structEndArchive, data)
289 endrec=list(endrec)
290
291 # Append a blank comment and record start offset
292 endrec.append(b"")
293 endrec.append(filesize - sizeEndCentDir)
294
295 # Try to read the "Zip64 end of central directory" structure
296 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
297
298 # Either this is not a ZIP file, or it is a ZIP file with an archive
299 # comment. Search the end of the file for the "end of central directory"
300 # record signature. The comment is the last item in the ZIP file and may be
301 # up to 64K long. It is assumed that the "end of central directory" magic
302 # number does not appear in the comment.
303 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
304 fpin.seek(maxCommentStart, 0)
305 data = fpin.read()
306 start = data.rfind(stringEndArchive)
307 if start >= 0:
308 # found the magic number; attempt to unpack and interpret
309 recData = data[start:start+sizeEndCentDir]
310 if len(recData) != sizeEndCentDir:
311 # Zip file is corrupted.
312 return None
313 endrec = list(struct.unpack(structEndArchive, recData))
314 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
315 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
316 endrec.append(comment)
317 endrec.append(maxCommentStart + start)
318
319 # Try to read the "Zip64 end of central directory" structure
320 return _EndRecData64(fpin, maxCommentStart + start - filesize,
321 endrec)
322
323 # Unable to find a valid end of central directory structure
324 return None
325
326
327class ZipInfo (object):
328 """Class with attributes describing each file in the ZIP archive."""
329
330 __slots__ = (
331 'orig_filename',
332 'filename',
333 'date_time',
334 'compress_type',
335 '_compresslevel',
336 'comment',
337 'extra',
338 'create_system',
339 'create_version',
340 'extract_version',
341 'reserved',
342 'flag_bits',
343 'volume',
344 'internal_attr',
345 'external_attr',
346 'header_offset',
347 'CRC',
348 'compress_size',
349 'file_size',
350 '_raw_time',
351 )
352
353 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
354 self.orig_filename = filename # Original file name in archive
355
356 # Terminate the file name at the first null byte. Null bytes in file
357 # names are used as tricks by viruses in archives.
358 null_byte = filename.find(chr(0))
359 if null_byte >= 0:
360 filename = filename[0:null_byte]
361 # This is used to ensure paths in generated ZIP files always use
362 # forward slashes as the directory separator, as required by the
363 # ZIP format specification.
364 if os.sep != "/" and os.sep in filename:
365 filename = filename.replace(os.sep, "/")
366
367 self.filename = filename # Normalized file name
368 self.date_time = date_time # year, month, day, hour, min, sec
369
370 if date_time[0] < 1980:
371 raise ValueError('ZIP does not support timestamps before 1980')
372
373 # Standard values:
374 self.compress_type = ZIP_STORED # Type of compression for the file
375 self._compresslevel = None # Level for the compressor
376 self.comment = b"" # Comment for each file
377 self.extra = b"" # ZIP extra data
378 if sys.platform == 'win32':
379 self.create_system = 0 # System which created ZIP archive
380 else:
381 # Assume everything else is unix-y
382 self.create_system = 3 # System which created ZIP archive
383 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
384 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
385 self.reserved = 0 # Must be zero
386 self.flag_bits = 0 # ZIP flag bits
387 self.volume = 0 # Volume number of file header
388 self.internal_attr = 0 # Internal attributes
389 self.external_attr = 0 # External file attributes
390 # Other attributes are set by class ZipFile:
391 # header_offset Byte offset to the file header
392 # CRC CRC-32 of the uncompressed file
393 # compress_size Size of the compressed file
394 # file_size Size of the uncompressed file
395
396 def __repr__(self):
397 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
398 if self.compress_type != ZIP_STORED:
399 result.append(' compress_type=%s' %
400 compressor_names.get(self.compress_type,
401 self.compress_type))
402 hi = self.external_attr >> 16
403 lo = self.external_attr & 0xFFFF
404 if hi:
405 result.append(' filemode=%r' % stat.filemode(hi))
406 if lo:
407 result.append(' external_attr=%#x' % lo)
408 isdir = self.is_dir()
409 if not isdir or self.file_size:
410 result.append(' file_size=%r' % self.file_size)
411 if ((not isdir or self.compress_size) and
412 (self.compress_type != ZIP_STORED or
413 self.file_size != self.compress_size)):
414 result.append(' compress_size=%r' % self.compress_size)
415 result.append('>')
416 return ''.join(result)
417
418 def FileHeader(self, zip64=None):
419 """Return the per-file header as a bytes object."""
420 dt = self.date_time
421 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
422 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
423 if self.flag_bits & 0x08:
424 # Set these to zero because we write them after the file data
425 CRC = compress_size = file_size = 0
426 else:
427 CRC = self.CRC
428 compress_size = self.compress_size
429 file_size = self.file_size
430
431 extra = self.extra
432
433 min_version = 0
434 if zip64 is None:
435 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
436 if zip64:
437 fmt = '<HHQQ'
438 extra = extra + struct.pack(fmt,
439 1, struct.calcsize(fmt)-4, file_size, compress_size)
440 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
441 if not zip64:
442 raise LargeZipFile("Filesize would require ZIP64 extensions")
443 # File is larger than what fits into a 4 byte integer,
444 # fall back to the ZIP64 extension
445 file_size = 0xffffffff
446 compress_size = 0xffffffff
447 min_version = ZIP64_VERSION
448
449 if self.compress_type == ZIP_BZIP2:
450 min_version = max(BZIP2_VERSION, min_version)
451 elif self.compress_type == ZIP_LZMA:
452 min_version = max(LZMA_VERSION, min_version)
453
454 self.extract_version = max(min_version, self.extract_version)
455 self.create_version = max(min_version, self.create_version)
456 filename, flag_bits = self._encodeFilenameFlags()
457 header = struct.pack(structFileHeader, stringFileHeader,
458 self.extract_version, self.reserved, flag_bits,
459 self.compress_type, dostime, dosdate, CRC,
460 compress_size, file_size,
461 len(filename), len(extra))
462 return header + filename + extra
463
464 def _encodeFilenameFlags(self):
465 try:
466 return self.filename.encode('ascii'), self.flag_bits
467 except UnicodeEncodeError:
468 return self.filename.encode('utf-8'), self.flag_bits | 0x800
469
470 def _decodeExtra(self):
471 # Try to decode the extra field.
472 extra = self.extra
473 unpack = struct.unpack
474 while len(extra) >= 4:
475 tp, ln = unpack('<HH', extra[:4])
476 if ln+4 > len(extra):
477 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
478 if tp == 0x0001:
479 if ln >= 24:
480 counts = unpack('<QQQ', extra[4:28])
481 elif ln == 16:
482 counts = unpack('<QQ', extra[4:20])
483 elif ln == 8:
484 counts = unpack('<Q', extra[4:12])
485 elif ln == 0:
486 counts = ()
487 else:
488 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
489
490 idx = 0
491
492 # ZIP64 extension (large files and/or large archives)
493 if self.file_size in (0xffffffffffffffff, 0xffffffff):
494 if len(counts) <= idx:
495 raise BadZipFile(
496 "Corrupt zip64 extra field. File size not found."
497 )
498 self.file_size = counts[idx]
499 idx += 1
500
501 if self.compress_size == 0xFFFFFFFF:
502 if len(counts) <= idx:
503 raise BadZipFile(
504 "Corrupt zip64 extra field. Compress size not found."
505 )
506 self.compress_size = counts[idx]
507 idx += 1
508
509 if self.header_offset == 0xffffffff:
510 if len(counts) <= idx:
511 raise BadZipFile(
512 "Corrupt zip64 extra field. Header offset not found."
513 )
514 old = self.header_offset
515 self.header_offset = counts[idx]
516 idx+=1
517
518 extra = extra[ln+4:]
519
520 @classmethod
521 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
522 """Construct an appropriate ZipInfo for a file on the filesystem.
523
524 filename should be the path to a file or directory on the filesystem.
525
526 arcname is the name which it will have within the archive (by default,
527 this will be the same as filename, but without a drive letter and with
528 leading path separators removed).
529 """
530 if isinstance(filename, os.PathLike):
531 filename = os.fspath(filename)
532 st = os.stat(filename)
533 isdir = stat.S_ISDIR(st.st_mode)
534 mtime = time.localtime(st.st_mtime)
535 date_time = mtime[0:6]
536 if not strict_timestamps and date_time[0] < 1980:
537 date_time = (1980, 1, 1, 0, 0, 0)
538 elif not strict_timestamps and date_time[0] > 2107:
539 date_time = (2107, 12, 31, 23, 59, 59)
540 # Create ZipInfo instance to store file information
541 if arcname is None:
542 arcname = filename
543 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
544 while arcname[0] in (os.sep, os.altsep):
545 arcname = arcname[1:]
546 if isdir:
547 arcname += '/'
548 zinfo = cls(arcname, date_time)
549 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
550 if isdir:
551 zinfo.file_size = 0
552 zinfo.external_attr |= 0x10 # MS-DOS directory flag
553 else:
554 zinfo.file_size = st.st_size
555
556 return zinfo
557
558 def is_dir(self):
559 """Return True if this archive member is a directory."""
560 return self.filename[-1] == '/'
561
562
563# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
564# internal keys. We noticed that a direct implementation is faster than
565# relying on binascii.crc32().
566
567_crctable = None
568def _gen_crc(crc):
569 for j in range(8):
570 if crc & 1:
571 crc = (crc >> 1) ^ 0xEDB88320
572 else:
573 crc >>= 1
574 return crc
575
576# ZIP supports a password-based form of encryption. Even though known
577# plaintext attacks have been found against it, it is still useful
578# to be able to get data out of such a file.
579#
580# Usage:
581# zd = _ZipDecrypter(mypwd)
582# plain_bytes = zd(cypher_bytes)
583
584def _ZipDecrypter(pwd):
585 key0 = 305419896
586 key1 = 591751049
587 key2 = 878082192
588
589 global _crctable
590 if _crctable is None:
591 _crctable = list(map(_gen_crc, range(256)))
592 crctable = _crctable
593
594 def crc32(ch, crc):
595 """Compute the CRC32 primitive on one byte."""
596 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
597
598 def update_keys(c):
599 nonlocal key0, key1, key2
600 key0 = crc32(c, key0)
601 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
602 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
603 key2 = crc32(key1 >> 24, key2)
604
605 for p in pwd:
606 update_keys(p)
607
608 def decrypter(data):
609 """Decrypt a bytes object."""
610 result = bytearray()
611 append = result.append
612 for c in data:
613 k = key2 | 2
614 c ^= ((k * (k^1)) >> 8) & 0xFF
615 update_keys(c)
616 append(c)
617 return bytes(result)
618
619 return decrypter
620
621
622class LZMACompressor:
623
624 def __init__(self):
625 self._comp = None
626
627 def _init(self):
628 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
629 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
630 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
631 ])
632 return struct.pack('<BBH', 9, 4, len(props)) + props
633
634 def compress(self, data):
635 if self._comp is None:
636 return self._init() + self._comp.compress(data)
637 return self._comp.compress(data)
638
639 def flush(self):
640 if self._comp is None:
641 return self._init() + self._comp.flush()
642 return self._comp.flush()
643
644
645class LZMADecompressor:
646
647 def __init__(self):
648 self._decomp = None
649 self._unconsumed = b''
650 self.eof = False
651
652 def decompress(self, data):
653 if self._decomp is None:
654 self._unconsumed += data
655 if len(self._unconsumed) <= 4:
656 return b''
657 psize, = struct.unpack('<H', self._unconsumed[2:4])
658 if len(self._unconsumed) <= 4 + psize:
659 return b''
660
661 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
662 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
663 self._unconsumed[4:4 + psize])
664 ])
665 data = self._unconsumed[4 + psize:]
666 del self._unconsumed
667
668 result = self._decomp.decompress(data)
669 self.eof = self._decomp.eof
670 return result
671
672
673compressor_names = {
674 0: 'store',
675 1: 'shrink',
676 2: 'reduce',
677 3: 'reduce',
678 4: 'reduce',
679 5: 'reduce',
680 6: 'implode',
681 7: 'tokenize',
682 8: 'deflate',
683 9: 'deflate64',
684 10: 'implode',
685 12: 'bzip2',
686 14: 'lzma',
687 18: 'terse',
688 19: 'lz77',
689 97: 'wavpack',
690 98: 'ppmd',
691}
692
693def _check_compression(compression):
694 if compression == ZIP_STORED:
695 pass
696 elif compression == ZIP_DEFLATED:
697 if not zlib:
698 raise RuntimeError(
699 "Compression requires the (missing) zlib module")
700 elif compression == ZIP_BZIP2:
701 if not bz2:
702 raise RuntimeError(
703 "Compression requires the (missing) bz2 module")
704 elif compression == ZIP_LZMA:
705 if not lzma:
706 raise RuntimeError(
707 "Compression requires the (missing) lzma module")
708 else:
709 raise NotImplementedError("That compression method is not supported")
710
711
712def _get_compressor(compress_type, compresslevel=None):
713 if compress_type == ZIP_DEFLATED:
714 if compresslevel is not None:
715 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
716 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
717 elif compress_type == ZIP_BZIP2:
718 if compresslevel is not None:
719 return bz2.BZ2Compressor(compresslevel)
720 return bz2.BZ2Compressor()
721 # compresslevel is ignored for ZIP_LZMA
722 elif compress_type == ZIP_LZMA:
723 return LZMACompressor()
724 else:
725 return None
726
727
728def _get_decompressor(compress_type):
729 _check_compression(compress_type)
730 if compress_type == ZIP_STORED:
731 return None
732 elif compress_type == ZIP_DEFLATED:
733 return zlib.decompressobj(-15)
734 elif compress_type == ZIP_BZIP2:
735 return bz2.BZ2Decompressor()
736 elif compress_type == ZIP_LZMA:
737 return LZMADecompressor()
738 else:
739 descr = compressor_names.get(compress_type)
740 if descr:
741 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
742 else:
743 raise NotImplementedError("compression type %d" % (compress_type,))
744
745
746class _SharedFile:
747 def __init__(self, file, pos, close, lock, writing):
748 self._file = file
749 self._pos = pos
750 self._close = close
751 self._lock = lock
752 self._writing = writing
753 self.seekable = file.seekable
754 self.tell = file.tell
755
756 def seek(self, offset, whence=0):
757 with self._lock:
758 if self._writing():
759 raise ValueError("Can't reposition in the ZIP file while "
760 "there is an open writing handle on it. "
761 "Close the writing handle before trying to read.")
762 self._file.seek(offset, whence)
763 self._pos = self._file.tell()
764 return self._pos
765
766 def read(self, n=-1):
767 with self._lock:
768 if self._writing():
769 raise ValueError("Can't read from the ZIP file while there "
770 "is an open writing handle on it. "
771 "Close the writing handle before trying to read.")
772 self._file.seek(self._pos)
773 data = self._file.read(n)
774 self._pos = self._file.tell()
775 return data
776
777 def close(self):
778 if self._file is not None:
779 fileobj = self._file
780 self._file = None
781 self._close(fileobj)
782
783# Provide the tell method for unseekable stream
784class _Tellable:
785 def __init__(self, fp):
786 self.fp = fp
787 self.offset = 0
788
789 def write(self, data):
790 n = self.fp.write(data)
791 self.offset += n
792 return n
793
794 def tell(self):
795 return self.offset
796
797 def flush(self):
798 self.fp.flush()
799
800 def close(self):
801 self.fp.close()
802
803
804class ZipExtFile(io.BufferedIOBase):
805 """File-like object for reading an archive member.
806 Is returned by ZipFile.open().
807 """
808
809 # Max size supported by decompressor.
810 MAX_N = 1 << 31 - 1
811
812 # Read from compressed files in 4k blocks.
813 MIN_READ_SIZE = 4096
814
815 # Chunk size to read during seek
816 MAX_SEEK_READ = 1 << 24
817
818 def __init__(self, fileobj, mode, zipinfo, pwd=None,
819 close_fileobj=False):
820 self._fileobj = fileobj
821 self._pwd = pwd
822 self._close_fileobj = close_fileobj
823
824 self._compress_type = zipinfo.compress_type
825 self._compress_left = zipinfo.compress_size
826 self._left = zipinfo.file_size
827
828 self._decompressor = _get_decompressor(self._compress_type)
829
830 self._eof = False
831 self._readbuffer = b''
832 self._offset = 0
833
834 self.newlines = None
835
836 self.mode = mode
837 self.name = zipinfo.filename
838
839 if hasattr(zipinfo, 'CRC'):
840 self._expected_crc = zipinfo.CRC
841 self._running_crc = crc32(b'')
842 else:
843 self._expected_crc = None
844
845 self._seekable = False
846 try:
847 if fileobj.seekable():
848 self._orig_compress_start = fileobj.tell()
849 self._orig_compress_size = zipinfo.compress_size
850 self._orig_file_size = zipinfo.file_size
851 self._orig_start_crc = self._running_crc
852 self._seekable = True
853 except AttributeError:
854 pass
855
856 self._decrypter = None
857 if pwd:
858 if zipinfo.flag_bits & 0x8:
859 # compare against the file type from extended local headers
860 check_byte = (zipinfo._raw_time >> 8) & 0xff
861 else:
862 # compare against the CRC otherwise
863 check_byte = (zipinfo.CRC >> 24) & 0xff
864 h = self._init_decrypter()
865 if h != check_byte:
866 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
867
868
869 def _init_decrypter(self):
870 self._decrypter = _ZipDecrypter(self._pwd)
871 # The first 12 bytes in the cypher stream is an encryption header
872 # used to strengthen the algorithm. The first 11 bytes are
873 # completely random, while the 12th contains the MSB of the CRC,
874 # or the MSB of the file time depending on the header type
875 # and is used to check the correctness of the password.
876 header = self._fileobj.read(12)
877 self._compress_left -= 12
878 return self._decrypter(header)[11]
879
880 def __repr__(self):
881 result = ['<%s.%s' % (self.__class__.__module__,
882 self.__class__.__qualname__)]
883 if not self.closed:
884 result.append(' name=%r mode=%r' % (self.name, self.mode))
885 if self._compress_type != ZIP_STORED:
886 result.append(' compress_type=%s' %
887 compressor_names.get(self._compress_type,
888 self._compress_type))
889 else:
890 result.append(' [closed]')
891 result.append('>')
892 return ''.join(result)
893
894 def readline(self, limit=-1):
895 """Read and return a line from the stream.
896
897 If limit is specified, at most limit bytes will be read.
898 """
899
900 if limit < 0:
901 # Shortcut common case - newline found in buffer.
902 i = self._readbuffer.find(b'\n', self._offset) + 1
903 if i > 0:
904 line = self._readbuffer[self._offset: i]
905 self._offset = i
906 return line
907
908 return io.BufferedIOBase.readline(self, limit)
909
910 def peek(self, n=1):
911 """Returns buffered bytes without advancing the position."""
912 if n > len(self._readbuffer) - self._offset:
913 chunk = self.read(n)
914 if len(chunk) > self._offset:
915 self._readbuffer = chunk + self._readbuffer[self._offset:]
916 self._offset = 0
917 else:
918 self._offset -= len(chunk)
919
920 # Return up to 512 bytes to reduce allocation overhead for tight loops.
921 return self._readbuffer[self._offset: self._offset + 512]
922
923 def readable(self):
924 return True
925
926 def read(self, n=-1):
927 """Read and return up to n bytes.
928 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
929 """
930 if n is None or n < 0:
931 buf = self._readbuffer[self._offset:]
932 self._readbuffer = b''
933 self._offset = 0
934 while not self._eof:
935 buf += self._read1(self.MAX_N)
936 return buf
937
938 end = n + self._offset
939 if end < len(self._readbuffer):
940 buf = self._readbuffer[self._offset:end]
941 self._offset = end
942 return buf
943
944 n = end - len(self._readbuffer)
945 buf = self._readbuffer[self._offset:]
946 self._readbuffer = b''
947 self._offset = 0
948 while n > 0 and not self._eof:
949 data = self._read1(n)
950 if n < len(data):
951 self._readbuffer = data
952 self._offset = n
953 buf += data[:n]
954 break
955 buf += data
956 n -= len(data)
957 return buf
958
959 def _update_crc(self, newdata):
960 # Update the CRC using the given data.
961 if self._expected_crc is None:
962 # No need to compute the CRC if we don't have a reference value
963 return
964 self._running_crc = crc32(newdata, self._running_crc)
965 # Check the CRC if we're at the end of the file
966 if self._eof and self._running_crc != self._expected_crc:
967 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
968
969 def read1(self, n):
970 """Read up to n bytes with at most one read() system call."""
971
972 if n is None or n < 0:
973 buf = self._readbuffer[self._offset:]
974 self._readbuffer = b''
975 self._offset = 0
976 while not self._eof:
977 data = self._read1(self.MAX_N)
978 if data:
979 buf += data
980 break
981 return buf
982
983 end = n + self._offset
984 if end < len(self._readbuffer):
985 buf = self._readbuffer[self._offset:end]
986 self._offset = end
987 return buf
988
989 n = end - len(self._readbuffer)
990 buf = self._readbuffer[self._offset:]
991 self._readbuffer = b''
992 self._offset = 0
993 if n > 0:
994 while not self._eof:
995 data = self._read1(n)
996 if n < len(data):
997 self._readbuffer = data
998 self._offset = n
999 buf += data[:n]
1000 break
1001 if data:
1002 buf += data
1003 break
1004 return buf
1005
1006 def _read1(self, n):
1007 # Read up to n compressed bytes with at most one read() system call,
1008 # decrypt and decompress them.
1009 if self._eof or n <= 0:
1010 return b''
1011
1012 # Read from file.
1013 if self._compress_type == ZIP_DEFLATED:
1014 ## Handle unconsumed data.
1015 data = self._decompressor.unconsumed_tail
1016 if n > len(data):
1017 data += self._read2(n - len(data))
1018 else:
1019 data = self._read2(n)
1020
1021 if self._compress_type == ZIP_STORED:
1022 self._eof = self._compress_left <= 0
1023 elif self._compress_type == ZIP_DEFLATED:
1024 n = max(n, self.MIN_READ_SIZE)
1025 data = self._decompressor.decompress(data, n)
1026 self._eof = (self._decompressor.eof or
1027 self._compress_left <= 0 and
1028 not self._decompressor.unconsumed_tail)
1029 if self._eof:
1030 data += self._decompressor.flush()
1031 else:
1032 data = self._decompressor.decompress(data)
1033 self._eof = self._decompressor.eof or self._compress_left <= 0
1034
1035 data = data[:self._left]
1036 self._left -= len(data)
1037 if self._left <= 0:
1038 self._eof = True
1039 self._update_crc(data)
1040 return data
1041
1042 def _read2(self, n):
1043 if self._compress_left <= 0:
1044 return b''
1045
1046 n = max(n, self.MIN_READ_SIZE)
1047 n = min(n, self._compress_left)
1048
1049 data = self._fileobj.read(n)
1050 self._compress_left -= len(data)
1051 if not data:
1052 raise EOFError
1053
1054 if self._decrypter is not None:
1055 data = self._decrypter(data)
1056 return data
1057
1058 def close(self):
1059 try:
1060 if self._close_fileobj:
1061 self._fileobj.close()
1062 finally:
1063 super().close()
1064
1065 def seekable(self):
1066 return self._seekable
1067
1068 def seek(self, offset, whence=0):
1069 if not self._seekable:
1070 raise io.UnsupportedOperation("underlying stream is not seekable")
1071 curr_pos = self.tell()
1072 if whence == 0: # Seek from start of file
1073 new_pos = offset
1074 elif whence == 1: # Seek from current position
1075 new_pos = curr_pos + offset
1076 elif whence == 2: # Seek from EOF
1077 new_pos = self._orig_file_size + offset
1078 else:
1079 raise ValueError("whence must be os.SEEK_SET (0), "
1080 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1081
1082 if new_pos > self._orig_file_size:
1083 new_pos = self._orig_file_size
1084
1085 if new_pos < 0:
1086 new_pos = 0
1087
1088 read_offset = new_pos - curr_pos
1089 buff_offset = read_offset + self._offset
1090
1091 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1092 # Just move the _offset index if the new position is in the _readbuffer
1093 self._offset = buff_offset
1094 read_offset = 0
1095 elif read_offset < 0:
1096 # Position is before the current position. Reset the ZipExtFile
1097 self._fileobj.seek(self._orig_compress_start)
1098 self._running_crc = self._orig_start_crc
1099 self._compress_left = self._orig_compress_size
1100 self._left = self._orig_file_size
1101 self._readbuffer = b''
1102 self._offset = 0
1103 self._decompressor = _get_decompressor(self._compress_type)
1104 self._eof = False
1105 read_offset = new_pos
1106 if self._decrypter is not None:
1107 self._init_decrypter()
1108
1109 while read_offset > 0:
1110 read_len = min(self.MAX_SEEK_READ, read_offset)
1111 self.read(read_len)
1112 read_offset -= read_len
1113
1114 return self.tell()
1115
1116 def tell(self):
1117 if not self._seekable:
1118 raise io.UnsupportedOperation("underlying stream is not seekable")
1119 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1120 return filepos
1121
1122
1123class _ZipWriteFile(io.BufferedIOBase):
1124 def __init__(self, zf, zinfo, zip64):
1125 self._zinfo = zinfo
1126 self._zip64 = zip64
1127 self._zipfile = zf
1128 self._compressor = _get_compressor(zinfo.compress_type,
1129 zinfo._compresslevel)
1130 self._file_size = 0
1131 self._compress_size = 0
1132 self._crc = 0
1133
1134 @property
1135 def _fileobj(self):
1136 return self._zipfile.fp
1137
1138 def writable(self):
1139 return True
1140
1141 def write(self, data):
1142 if self.closed:
1143 raise ValueError('I/O operation on closed file.')
1144 nbytes = len(data)
1145 self._file_size += nbytes
1146 self._crc = crc32(data, self._crc)
1147 if self._compressor:
1148 data = self._compressor.compress(data)
1149 self._compress_size += len(data)
1150 self._fileobj.write(data)
1151 return nbytes
1152
1153 def close(self):
1154 if self.closed:
1155 return
1156 try:
1157 super().close()
1158 # Flush any data from the compressor, and update header info
1159 if self._compressor:
1160 buf = self._compressor.flush()
1161 self._compress_size += len(buf)
1162 self._fileobj.write(buf)
1163 self._zinfo.compress_size = self._compress_size
1164 else:
1165 self._zinfo.compress_size = self._file_size
1166 self._zinfo.CRC = self._crc
1167 self._zinfo.file_size = self._file_size
1168
1169 # Write updated header info
1170 if self._zinfo.flag_bits & 0x08:
1171 # Write CRC and file sizes after the file data
1172 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1173 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1174 self._zinfo.compress_size, self._zinfo.file_size))
1175 self._zipfile.start_dir = self._fileobj.tell()
1176 else:
1177 if not self._zip64:
1178 if self._file_size > ZIP64_LIMIT:
1179 raise RuntimeError(
1180 'File size unexpectedly exceeded ZIP64 limit')
1181 if self._compress_size > ZIP64_LIMIT:
1182 raise RuntimeError(
1183 'Compressed size unexpectedly exceeded ZIP64 limit')
1184 # Seek backwards and write file header (which will now include
1185 # correct CRC and file sizes)
1186
1187 # Preserve current position in file
1188 self._zipfile.start_dir = self._fileobj.tell()
1189 self._fileobj.seek(self._zinfo.header_offset)
1190 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1191 self._fileobj.seek(self._zipfile.start_dir)
1192
1193 # Successfully written: Add file to our caches
1194 self._zipfile.filelist.append(self._zinfo)
1195 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1196 finally:
1197 self._zipfile._writing = False
1198
1199
1200
1201class ZipFile:
1202 """ Class with methods to open, read, write, close, list zip files.
1203
1204 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1205 compresslevel=None)
1206
1207 file: Either the path to the file, or a file-like object.
1208 If it is a path, the file will be opened and closed by ZipFile.
1209 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1210 or append 'a'.
1211 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1212 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1213 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1214 needed, otherwise it will raise an exception when this would
1215 be necessary.
1216 compresslevel: None (default for the given compression type) or an integer
1217 specifying the level to pass to the compressor.
1218 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1219 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1220 When using ZIP_BZIP2 integers 1 through 9 are accepted.
1221
1222 """
1223
1224 fp = None # Set here since __del__ checks it
1225 _windows_illegal_name_trans_table = None
1226
1227 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1228 compresslevel=None, *, strict_timestamps=True):
1229 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1230 or append 'a'."""
1231 if mode not in ('r', 'w', 'x', 'a'):
1232 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1233
1234 _check_compression(compression)
1235
1236 self._allowZip64 = allowZip64
1237 self._didModify = False
1238 self.debug = 0 # Level of printing: 0 through 3
1239 self.NameToInfo = {} # Find file info given name
1240 self.filelist = [] # List of ZipInfo instances for archive
1241 self.compression = compression # Method of compression
1242 self.compresslevel = compresslevel
1243 self.mode = mode
1244 self.pwd = None
1245 self._comment = b''
1246 self._strict_timestamps = strict_timestamps
1247
1248 # Check if we were passed a file-like object
1249 if isinstance(file, os.PathLike):
1250 file = os.fspath(file)
1251 if isinstance(file, str):
1252 # No, it's a filename
1253 self._filePassed = 0
1254 self.filename = file
1255 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1256 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1257 filemode = modeDict[mode]
1258 while True:
1259 try:
1260 self.fp = io.open(file, filemode)
1261 except OSError:
1262 if filemode in modeDict:
1263 filemode = modeDict[filemode]
1264 continue
1265 raise
1266 break
1267 else:
1268 self._filePassed = 1
1269 self.fp = file
1270 self.filename = getattr(file, 'name', None)
1271 self._fileRefCnt = 1
1272 self._lock = threading.RLock()
1273 self._seekable = True
1274 self._writing = False
1275
1276 try:
1277 if mode == 'r':
1278 self._RealGetContents()
1279 elif mode in ('w', 'x'):
1280 # set the modified flag so central directory gets written
1281 # even if no files are added to the archive
1282 self._didModify = True
1283 try:
1284 self.start_dir = self.fp.tell()
1285 except (AttributeError, OSError):
1286 self.fp = _Tellable(self.fp)
1287 self.start_dir = 0
1288 self._seekable = False
1289 else:
1290 # Some file-like objects can provide tell() but not seek()
1291 try:
1292 self.fp.seek(self.start_dir)
1293 except (AttributeError, OSError):
1294 self._seekable = False
1295 elif mode == 'a':
1296 try:
1297 # See if file is a zip file
1298 self._RealGetContents()
1299 # seek to start of directory and overwrite
1300 self.fp.seek(self.start_dir)
1301 except BadZipFile:
1302 # file is not a zip file, just append
1303 self.fp.seek(0, 2)
1304
1305 # set the modified flag so central directory gets written
1306 # even if no files are added to the archive
1307 self._didModify = True
1308 self.start_dir = self.fp.tell()
1309 else:
1310 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1311 except:
1312 fp = self.fp
1313 self.fp = None
1314 self._fpclose(fp)
1315 raise
1316
1317 def __enter__(self):
1318 return self
1319
1320 def __exit__(self, type, value, traceback):
1321 self.close()
1322
1323 def __repr__(self):
1324 result = ['<%s.%s' % (self.__class__.__module__,
1325 self.__class__.__qualname__)]
1326 if self.fp is not None:
1327 if self._filePassed:
1328 result.append(' file=%r' % self.fp)
1329 elif self.filename is not None:
1330 result.append(' filename=%r' % self.filename)
1331 result.append(' mode=%r' % self.mode)
1332 else:
1333 result.append(' [closed]')
1334 result.append('>')
1335 return ''.join(result)
1336
1337 def _RealGetContents(self):
1338 """Read in the table of contents for the ZIP file."""
1339 fp = self.fp
1340 try:
1341 endrec = _EndRecData(fp)
1342 except OSError:
1343 raise BadZipFile("File is not a zip file")
1344 if not endrec:
1345 raise BadZipFile("File is not a zip file")
1346 if self.debug > 1:
1347 print(endrec)
1348 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1349 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
1350 self._comment = endrec[_ECD_COMMENT] # archive comment
1351
1352 # "concat" is zero, unless zip was concatenated to another file
1353 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1354 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1355 # If Zip64 extension structures are present, account for them
1356 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1357
1358 if self.debug > 2:
1359 inferred = concat + offset_cd
1360 print("given, inferred, offset", offset_cd, inferred, concat)
1361 # self.start_dir: Position of start of central directory
1362 self.start_dir = offset_cd + concat
1363 fp.seek(self.start_dir, 0)
1364 data = fp.read(size_cd)
1365 fp = io.BytesIO(data)
1366 total = 0
1367 while total < size_cd:
1368 centdir = fp.read(sizeCentralDir)
1369 if len(centdir) != sizeCentralDir:
1370 raise BadZipFile("Truncated central directory")
1371 centdir = struct.unpack(structCentralDir, centdir)
1372 if centdir[_CD_SIGNATURE] != stringCentralDir:
1373 raise BadZipFile("Bad magic number for central directory")
1374 if self.debug > 2:
1375 print(centdir)
1376 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1377 flags = centdir[5]
1378 if flags & 0x800:
1379 # UTF-8 file names extension
1380 filename = filename.decode('utf-8')
1381 else:
1382 # Historical ZIP filename encoding
1383 filename = filename.decode('cp437')
1384 # Create ZipInfo instance to store file information
1385 x = ZipInfo(filename)
1386 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1387 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1388 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1389 (x.create_version, x.create_system, x.extract_version, x.reserved,
1390 x.flag_bits, x.compress_type, t, d,
1391 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1392 if x.extract_version > MAX_EXTRACT_VERSION:
1393 raise NotImplementedError("zip file version %.1f" %
1394 (x.extract_version / 10))
1395 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1396 # Convert date/time code to (year, month, day, hour, min, sec)
1397 x._raw_time = t
1398 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1399 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1400
1401 x._decodeExtra()
1402 x.header_offset = x.header_offset + concat
1403 self.filelist.append(x)
1404 self.NameToInfo[x.filename] = x
1405
1406 # update total bytes read from central directory
1407 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1408 + centdir[_CD_EXTRA_FIELD_LENGTH]
1409 + centdir[_CD_COMMENT_LENGTH])
1410
1411 if self.debug > 2:
1412 print("total", total)
1413
1414
1415 def namelist(self):
1416 """Return a list of file names in the archive."""
1417 return [data.filename for data in self.filelist]
1418
1419 def infolist(self):
1420 """Return a list of class ZipInfo instances for files in the
1421 archive."""
1422 return self.filelist
1423
1424 def printdir(self, file=None):
1425 """Print a table of contents for the zip file."""
1426 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1427 file=file)
1428 for zinfo in self.filelist:
1429 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1430 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1431 file=file)
1432
1433 def testzip(self):
1434 """Read all the files and check the CRC."""
1435 chunk_size = 2 ** 20
1436 for zinfo in self.filelist:
1437 try:
1438 # Read by chunks, to avoid an OverflowError or a
1439 # MemoryError with very large embedded files.
1440 with self.open(zinfo.filename, "r") as f:
1441 while f.read(chunk_size): # Check CRC-32
1442 pass
1443 except BadZipFile:
1444 return zinfo.filename
1445
1446 def getinfo(self, name):
1447 """Return the instance of ZipInfo given 'name'."""
1448 info = self.NameToInfo.get(name)
1449 if info is None:
1450 raise KeyError(
1451 'There is no item named %r in the archive' % name)
1452
1453 return info
1454
1455 def setpassword(self, pwd):
1456 """Set default password for encrypted files."""
1457 if pwd and not isinstance(pwd, bytes):
1458 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1459 if pwd:
1460 self.pwd = pwd
1461 else:
1462 self.pwd = None
1463
1464 @property
1465 def comment(self):
1466 """The comment text associated with the ZIP file."""
1467 return self._comment
1468
1469 @comment.setter
1470 def comment(self, comment):
1471 if not isinstance(comment, bytes):
1472 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1473 # check for valid comment length
1474 if len(comment) > ZIP_MAX_COMMENT:
1475 import warnings
1476 warnings.warn('Archive comment is too long; truncating to %d bytes'
1477 % ZIP_MAX_COMMENT, stacklevel=2)
1478 comment = comment[:ZIP_MAX_COMMENT]
1479 self._comment = comment
1480 self._didModify = True
1481
1482 def read(self, name, pwd=None):
1483 """Return file bytes for name."""
1484 with self.open(name, "r", pwd) as fp:
1485 return fp.read()
1486
1487 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1488 """Return file-like object for 'name'.
1489
1490 name is a string for the file name within the ZIP file, or a ZipInfo
1491 object.
1492
1493 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1494 write to a file newly added to the archive.
1495
1496 pwd is the password to decrypt files (only used for reading).
1497
1498 When writing, if the file size is not known in advance but may exceed
1499 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1500 files. If the size is known in advance, it is best to pass a ZipInfo
1501 instance for name, with zinfo.file_size set.
1502 """
1503 if mode not in {"r", "w"}:
1504 raise ValueError('open() requires mode "r" or "w"')
1505 if pwd and not isinstance(pwd, bytes):
1506 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1507 if pwd and (mode == "w"):
1508 raise ValueError("pwd is only supported for reading files")
1509 if not self.fp:
1510 raise ValueError(
1511 "Attempt to use ZIP archive that was already closed")
1512
1513 # Make sure we have an info object
1514 if isinstance(name, ZipInfo):
1515 # 'name' is already an info object
1516 zinfo = name
1517 elif mode == 'w':
1518 zinfo = ZipInfo(name)
1519 zinfo.compress_type = self.compression
1520 zinfo._compresslevel = self.compresslevel
1521 else:
1522 # Get info object for name
1523 zinfo = self.getinfo(name)
1524
1525 if mode == 'w':
1526 return self._open_to_write(zinfo, force_zip64=force_zip64)
1527
1528 if self._writing:
1529 raise ValueError("Can't read from the ZIP file while there "
1530 "is an open writing handle on it. "
1531 "Close the writing handle before trying to read.")
1532
1533 # Open for reading:
1534 self._fileRefCnt += 1
1535 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1536 self._fpclose, self._lock, lambda: self._writing)
1537 try:
1538 # Skip the file header:
1539 fheader = zef_file.read(sizeFileHeader)
1540 if len(fheader) != sizeFileHeader:
1541 raise BadZipFile("Truncated file header")
1542 fheader = struct.unpack(structFileHeader, fheader)
1543 if fheader[_FH_SIGNATURE] != stringFileHeader:
1544 raise BadZipFile("Bad magic number for file header")
1545
1546 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1547 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1548 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1549
1550 if zinfo.flag_bits & 0x20:
1551 # Zip 2.7: compressed patched data
1552 raise NotImplementedError("compressed patched data (flag bit 5)")
1553
1554 if zinfo.flag_bits & 0x40:
1555 # strong encryption
1556 raise NotImplementedError("strong encryption (flag bit 6)")
1557
1558 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800:
1559 # UTF-8 filename
1560 fname_str = fname.decode("utf-8")
1561 else:
1562 fname_str = fname.decode("cp437")
1563
1564 if fname_str != zinfo.orig_filename:
1565 raise BadZipFile(
1566 'File name in directory %r and header %r differ.'
1567 % (zinfo.orig_filename, fname))
1568
1569 # check for encrypted flag & handle password
1570 is_encrypted = zinfo.flag_bits & 0x1
1571 if is_encrypted:
1572 if not pwd:
1573 pwd = self.pwd
1574 if not pwd:
1575 raise RuntimeError("File %r is encrypted, password "
1576 "required for extraction" % name)
1577 else:
1578 pwd = None
1579
1580 return ZipExtFile(zef_file, mode, zinfo, pwd, True)
1581 except:
1582 zef_file.close()
1583 raise
1584
1585 def _open_to_write(self, zinfo, force_zip64=False):
1586 if force_zip64 and not self._allowZip64:
1587 raise ValueError(
1588 "force_zip64 is True, but allowZip64 was False when opening "
1589 "the ZIP file."
1590 )
1591 if self._writing:
1592 raise ValueError("Can't write to the ZIP file while there is "
1593 "another write handle open on it. "
1594 "Close the first handle before opening another.")
1595
1596 # Sizes and CRC are overwritten with correct data after processing the file
1597 if not hasattr(zinfo, 'file_size'):
1598 zinfo.file_size = 0
1599 zinfo.compress_size = 0
1600 zinfo.CRC = 0
1601
1602 zinfo.flag_bits = 0x00
1603 if zinfo.compress_type == ZIP_LZMA:
1604 # Compressed data includes an end-of-stream (EOS) marker
1605 zinfo.flag_bits |= 0x02
1606 if not self._seekable:
1607 zinfo.flag_bits |= 0x08
1608
1609 if not zinfo.external_attr:
1610 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1611
1612 # Compressed size can be larger than uncompressed size
1613 zip64 = self._allowZip64 and \
1614 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1615
1616 if self._seekable:
1617 self.fp.seek(self.start_dir)
1618 zinfo.header_offset = self.fp.tell()
1619
1620 self._writecheck(zinfo)
1621 self._didModify = True
1622
1623 self.fp.write(zinfo.FileHeader(zip64))
1624
1625 self._writing = True
1626 return _ZipWriteFile(self, zinfo, zip64)
1627
1628 def extract(self, member, path=None, pwd=None):
1629 """Extract a member from the archive to the current working directory,
1630 using its full name. Its file information is extracted as accurately
1631 as possible. `member' may be a filename or a ZipInfo object. You can
1632 specify a different directory using `path'.
1633 """
1634 if path is None:
1635 path = os.getcwd()
1636 else:
1637 path = os.fspath(path)
1638
1639 return self._extract_member(member, path, pwd)
1640
1641 def extractall(self, path=None, members=None, pwd=None):
1642 """Extract all members from the archive to the current working
1643 directory. `path' specifies a different directory to extract to.
1644 `members' is optional and must be a subset of the list returned
1645 by namelist().
1646 """
1647 if members is None:
1648 members = self.namelist()
1649
1650 if path is None:
1651 path = os.getcwd()
1652 else:
1653 path = os.fspath(path)
1654
1655 for zipinfo in members:
1656 self._extract_member(zipinfo, path, pwd)
1657
1658 @classmethod
1659 def _sanitize_windows_name(cls, arcname, pathsep):
1660 """Replace bad characters and remove trailing dots from parts."""
1661 table = cls._windows_illegal_name_trans_table
1662 if not table:
1663 illegal = ':<>|"?*'
1664 table = str.maketrans(illegal, '_' * len(illegal))
1665 cls._windows_illegal_name_trans_table = table
1666 arcname = arcname.translate(table)
1667 # remove trailing dots
1668 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1669 # rejoin, removing empty parts.
1670 arcname = pathsep.join(x for x in arcname if x)
1671 return arcname
1672
1673 def _extract_member(self, member, targetpath, pwd):
1674 """Extract the ZipInfo object 'member' to a physical
1675 file on the path targetpath.
1676 """
1677 if not isinstance(member, ZipInfo):
1678 member = self.getinfo(member)
1679
1680 # build the destination pathname, replacing
1681 # forward slashes to platform specific separators.
1682 arcname = member.filename.replace('/', os.path.sep)
1683
1684 if os.path.altsep:
1685 arcname = arcname.replace(os.path.altsep, os.path.sep)
1686 # interpret absolute pathname as relative, remove drive letter or
1687 # UNC path, redundant separators, "." and ".." components.
1688 arcname = os.path.splitdrive(arcname)[1]
1689 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1690 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1691 if x not in invalid_path_parts)
1692 if os.path.sep == '\\':
1693 # filter illegal characters on Windows
1694 arcname = self._sanitize_windows_name(arcname, os.path.sep)
1695
1696 targetpath = os.path.join(targetpath, arcname)
1697 targetpath = os.path.normpath(targetpath)
1698
1699 # Create all upper directories if necessary.
1700 upperdirs = os.path.dirname(targetpath)
1701 if upperdirs and not os.path.exists(upperdirs):
1702 os.makedirs(upperdirs)
1703
1704 if member.is_dir():
1705 if not os.path.isdir(targetpath):
1706 os.mkdir(targetpath)
1707 return targetpath
1708
1709 with self.open(member, pwd=pwd) as source, \
1710 open(targetpath, "wb") as target:
1711 shutil.copyfileobj(source, target)
1712
1713 return targetpath
1714
1715 def _writecheck(self, zinfo):
1716 """Check for errors before writing a file to the archive."""
1717 if zinfo.filename in self.NameToInfo:
1718 import warnings
1719 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1720 if self.mode not in ('w', 'x', 'a'):
1721 raise ValueError("write() requires mode 'w', 'x', or 'a'")
1722 if not self.fp:
1723 raise ValueError(
1724 "Attempt to write ZIP archive that was already closed")
1725 _check_compression(zinfo.compress_type)
1726 if not self._allowZip64:
1727 requires_zip64 = None
1728 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1729 requires_zip64 = "Files count"
1730 elif zinfo.file_size > ZIP64_LIMIT:
1731 requires_zip64 = "Filesize"
1732 elif zinfo.header_offset > ZIP64_LIMIT:
1733 requires_zip64 = "Zipfile size"
1734 if requires_zip64:
1735 raise LargeZipFile(requires_zip64 +
1736 " would require ZIP64 extensions")
1737
1738 def write(self, filename, arcname=None,
1739 compress_type=None, compresslevel=None):
1740 """Put the bytes from filename into the archive under the name
1741 arcname."""
1742 if not self.fp:
1743 raise ValueError(
1744 "Attempt to write to ZIP archive that was already closed")
1745 if self._writing:
1746 raise ValueError(
1747 "Can't write to ZIP archive while an open writing handle exists"
1748 )
1749
1750 zinfo = ZipInfo.from_file(filename, arcname,
1751 strict_timestamps=self._strict_timestamps)
1752
1753 if zinfo.is_dir():
1754 zinfo.compress_size = 0
1755 zinfo.CRC = 0
1756 else:
1757 if compress_type is not None:
1758 zinfo.compress_type = compress_type
1759 else:
1760 zinfo.compress_type = self.compression
1761
1762 if compresslevel is not None:
1763 zinfo._compresslevel = compresslevel
1764 else:
1765 zinfo._compresslevel = self.compresslevel
1766
1767 if zinfo.is_dir():
1768 with self._lock:
1769 if self._seekable:
1770 self.fp.seek(self.start_dir)
1771 zinfo.header_offset = self.fp.tell() # Start of header bytes
1772 if zinfo.compress_type == ZIP_LZMA:
1773 # Compressed data includes an end-of-stream (EOS) marker
1774 zinfo.flag_bits |= 0x02
1775
1776 self._writecheck(zinfo)
1777 self._didModify = True
1778
1779 self.filelist.append(zinfo)
1780 self.NameToInfo[zinfo.filename] = zinfo
1781 self.fp.write(zinfo.FileHeader(False))
1782 self.start_dir = self.fp.tell()
1783 else:
1784 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1785 shutil.copyfileobj(src, dest, 1024*8)
1786
1787 def writestr(self, zinfo_or_arcname, data,
1788 compress_type=None, compresslevel=None):
1789 """Write a file into the archive. The contents is 'data', which
1790 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1791 it is encoded as UTF-8 first.
1792 'zinfo_or_arcname' is either a ZipInfo instance or
1793 the name of the file in the archive."""
1794 if isinstance(data, str):
1795 data = data.encode("utf-8")
1796 if not isinstance(zinfo_or_arcname, ZipInfo):
1797 zinfo = ZipInfo(filename=zinfo_or_arcname,
1798 date_time=time.localtime(time.time())[:6])
1799 zinfo.compress_type = self.compression
1800 zinfo._compresslevel = self.compresslevel
1801 if zinfo.filename[-1] == '/':
1802 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1803 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1804 else:
1805 zinfo.external_attr = 0o600 << 16 # ?rw-------
1806 else:
1807 zinfo = zinfo_or_arcname
1808
1809 if not self.fp:
1810 raise ValueError(
1811 "Attempt to write to ZIP archive that was already closed")
1812 if self._writing:
1813 raise ValueError(
1814 "Can't write to ZIP archive while an open writing handle exists."
1815 )
1816
1817 if compress_type is not None:
1818 zinfo.compress_type = compress_type
1819
1820 if compresslevel is not None:
1821 zinfo._compresslevel = compresslevel
1822
1823 zinfo.file_size = len(data) # Uncompressed size
1824 with self._lock:
1825 with self.open(zinfo, mode='w') as dest:
1826 dest.write(data)
1827
1828 def __del__(self):
1829 """Call the "close()" method in case the user forgot."""
1830 self.close()
1831
1832 def close(self):
1833 """Close the file, and for mode 'w', 'x' and 'a' write the ending
1834 records."""
1835 if self.fp is None:
1836 return
1837
1838 if self._writing:
1839 raise ValueError("Can't close the ZIP file while there is "
1840 "an open writing handle on it. "
1841 "Close the writing handle before closing the zip.")
1842
1843 try:
1844 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1845 with self._lock:
1846 if self._seekable:
1847 self.fp.seek(self.start_dir)
1848 self._write_end_record()
1849 finally:
1850 fp = self.fp
1851 self.fp = None
1852 self._fpclose(fp)
1853
1854 def _write_end_record(self):
1855 for zinfo in self.filelist: # write central directory
1856 dt = zinfo.date_time
1857 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1858 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1859 extra = []
1860 if zinfo.file_size > ZIP64_LIMIT \
1861 or zinfo.compress_size > ZIP64_LIMIT:
1862 extra.append(zinfo.file_size)
1863 extra.append(zinfo.compress_size)
1864 file_size = 0xffffffff
1865 compress_size = 0xffffffff
1866 else:
1867 file_size = zinfo.file_size
1868 compress_size = zinfo.compress_size
1869
1870 if zinfo.header_offset > ZIP64_LIMIT:
1871 extra.append(zinfo.header_offset)
1872 header_offset = 0xffffffff
1873 else:
1874 header_offset = zinfo.header_offset
1875
1876 extra_data = zinfo.extra
1877 min_version = 0
1878 if extra:
1879 # Append a ZIP64 field to the extra's
1880 extra_data = _strip_extra(extra_data, (1,))
1881 extra_data = struct.pack(
1882 '<HH' + 'Q'*len(extra),
1883 1, 8*len(extra), *extra) + extra_data
1884
1885 min_version = ZIP64_VERSION
1886
1887 if zinfo.compress_type == ZIP_BZIP2:
1888 min_version = max(BZIP2_VERSION, min_version)
1889 elif zinfo.compress_type == ZIP_LZMA:
1890 min_version = max(LZMA_VERSION, min_version)
1891
1892 extract_version = max(min_version, zinfo.extract_version)
1893 create_version = max(min_version, zinfo.create_version)
1894 try:
1895 filename, flag_bits = zinfo._encodeFilenameFlags()
1896 centdir = struct.pack(structCentralDir,
1897 stringCentralDir, create_version,
1898 zinfo.create_system, extract_version, zinfo.reserved,
1899 flag_bits, zinfo.compress_type, dostime, dosdate,
1900 zinfo.CRC, compress_size, file_size,
1901 len(filename), len(extra_data), len(zinfo.comment),
1902 0, zinfo.internal_attr, zinfo.external_attr,
1903 header_offset)
1904 except DeprecationWarning:
1905 print((structCentralDir, stringCentralDir, create_version,
1906 zinfo.create_system, extract_version, zinfo.reserved,
1907 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1908 zinfo.CRC, compress_size, file_size,
1909 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1910 0, zinfo.internal_attr, zinfo.external_attr,
1911 header_offset), file=sys.stderr)
1912 raise
1913 self.fp.write(centdir)
1914 self.fp.write(filename)
1915 self.fp.write(extra_data)
1916 self.fp.write(zinfo.comment)
1917
1918 pos2 = self.fp.tell()
1919 # Write end-of-zip-archive record
1920 centDirCount = len(self.filelist)
1921 centDirSize = pos2 - self.start_dir
1922 centDirOffset = self.start_dir
1923 requires_zip64 = None
1924 if centDirCount > ZIP_FILECOUNT_LIMIT:
1925 requires_zip64 = "Files count"
1926 elif centDirOffset > ZIP64_LIMIT:
1927 requires_zip64 = "Central directory offset"
1928 elif centDirSize > ZIP64_LIMIT:
1929 requires_zip64 = "Central directory size"
1930 if requires_zip64:
1931 # Need to write the ZIP64 end-of-archive records
1932 if not self._allowZip64:
1933 raise LargeZipFile(requires_zip64 +
1934 " would require ZIP64 extensions")
1935 zip64endrec = struct.pack(
1936 structEndArchive64, stringEndArchive64,
1937 44, 45, 45, 0, 0, centDirCount, centDirCount,
1938 centDirSize, centDirOffset)
1939 self.fp.write(zip64endrec)
1940
1941 zip64locrec = struct.pack(
1942 structEndArchive64Locator,
1943 stringEndArchive64Locator, 0, pos2, 1)
1944 self.fp.write(zip64locrec)
1945 centDirCount = min(centDirCount, 0xFFFF)
1946 centDirSize = min(centDirSize, 0xFFFFFFFF)
1947 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1948
1949 endrec = struct.pack(structEndArchive, stringEndArchive,
1950 0, 0, centDirCount, centDirCount,
1951 centDirSize, centDirOffset, len(self._comment))
1952 self.fp.write(endrec)
1953 self.fp.write(self._comment)
1954 self.fp.flush()
1955
1956 def _fpclose(self, fp):
1957 assert self._fileRefCnt > 0
1958 self._fileRefCnt -= 1
1959 if not self._fileRefCnt and not self._filePassed:
1960 fp.close()
1961
1962
1963class PyZipFile(ZipFile):
1964 """Class to create ZIP archives with Python library files and packages."""
1965
1966 def __init__(self, file, mode="r", compression=ZIP_STORED,
1967 allowZip64=True, optimize=-1):
1968 ZipFile.__init__(self, file, mode=mode, compression=compression,
1969 allowZip64=allowZip64)
1970 self._optimize = optimize
1971
1972 def writepy(self, pathname, basename="", filterfunc=None):
1973 """Add all files from "pathname" to the ZIP archive.
1974
1975 If pathname is a package directory, search the directory and
1976 all package subdirectories recursively for all *.py and enter
1977 the modules into the archive. If pathname is a plain
1978 directory, listdir *.py and enter all modules. Else, pathname
1979 must be a Python *.py file and the module will be put into the
1980 archive. Added modules are always module.pyc.
1981 This method will compile the module.py into module.pyc if
1982 necessary.
1983 If filterfunc(pathname) is given, it is called with every argument.
1984 When it is False, the file or directory is skipped.
1985 """
1986 pathname = os.fspath(pathname)
1987 if filterfunc and not filterfunc(pathname):
1988 if self.debug:
1989 label = 'path' if os.path.isdir(pathname) else 'file'
1990 print('%s %r skipped by filterfunc' % (label, pathname))
1991 return
1992 dir, name = os.path.split(pathname)
1993 if os.path.isdir(pathname):
1994 initname = os.path.join(pathname, "__init__.py")
1995 if os.path.isfile(initname):
1996 # This is a package directory, add it
1997 if basename:
1998 basename = "%s/%s" % (basename, name)
1999 else:
2000 basename = name
2001 if self.debug:
2002 print("Adding package in", pathname, "as", basename)
2003 fname, arcname = self._get_codename(initname[0:-3], basename)
2004 if self.debug:
2005 print("Adding", arcname)
2006 self.write(fname, arcname)
2007 dirlist = sorted(os.listdir(pathname))
2008 dirlist.remove("__init__.py")
2009 # Add all *.py files and package subdirectories
2010 for filename in dirlist:
2011 path = os.path.join(pathname, filename)
2012 root, ext = os.path.splitext(filename)
2013 if os.path.isdir(path):
2014 if os.path.isfile(os.path.join(path, "__init__.py")):
2015 # This is a package directory, add it
2016 self.writepy(path, basename,
2017 filterfunc=filterfunc) # Recursive call
2018 elif ext == ".py":
2019 if filterfunc and not filterfunc(path):
2020 if self.debug:
2021 print('file %r skipped by filterfunc' % path)
2022 continue
2023 fname, arcname = self._get_codename(path[0:-3],
2024 basename)
2025 if self.debug:
2026 print("Adding", arcname)
2027 self.write(fname, arcname)
2028 else:
2029 # This is NOT a package directory, add its files at top level
2030 if self.debug:
2031 print("Adding files from directory", pathname)
2032 for filename in sorted(os.listdir(pathname)):
2033 path = os.path.join(pathname, filename)
2034 root, ext = os.path.splitext(filename)
2035 if ext == ".py":
2036 if filterfunc and not filterfunc(path):
2037 if self.debug:
2038 print('file %r skipped by filterfunc' % path)
2039 continue
2040 fname, arcname = self._get_codename(path[0:-3],
2041 basename)
2042 if self.debug:
2043 print("Adding", arcname)
2044 self.write(fname, arcname)
2045 else:
2046 if pathname[-3:] != ".py":
2047 raise RuntimeError(
2048 'Files added with writepy() must end with ".py"')
2049 fname, arcname = self._get_codename(pathname[0:-3], basename)
2050 if self.debug:
2051 print("Adding file", arcname)
2052 self.write(fname, arcname)
2053
2054 def _get_codename(self, pathname, basename):
2055 """Return (filename, archivename) for the path.
2056
2057 Given a module name path, return the correct file path and
2058 archive name, compiling if necessary. For example, given
2059 /python/lib/string, return (/python/lib/string.pyc, string).
2060 """
2061 def _compile(file, optimize=-1):
2062 import py_compile
2063 if self.debug:
2064 print("Compiling", file)
2065 try:
2066 py_compile.compile(file, doraise=True, optimize=optimize)
2067 except py_compile.PyCompileError as err:
2068 print(err.msg)
2069 return False
2070 return True
2071
2072 file_py = pathname + ".py"
2073 file_pyc = pathname + ".pyc"
2074 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2075 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2076 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2077 if self._optimize == -1:
2078 # legacy mode: use whatever file is present
2079 if (os.path.isfile(file_pyc) and
2080 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2081 # Use .pyc file.
2082 arcname = fname = file_pyc
2083 elif (os.path.isfile(pycache_opt0) and
2084 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2085 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2086 # file name in the archive.
2087 fname = pycache_opt0
2088 arcname = file_pyc
2089 elif (os.path.isfile(pycache_opt1) and
2090 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2091 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2092 # file name in the archive.
2093 fname = pycache_opt1
2094 arcname = file_pyc
2095 elif (os.path.isfile(pycache_opt2) and
2096 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2097 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2098 # file name in the archive.
2099 fname = pycache_opt2
2100 arcname = file_pyc
2101 else:
2102 # Compile py into PEP 3147 pyc file.
2103 if _compile(file_py):
2104 if sys.flags.optimize == 0:
2105 fname = pycache_opt0
2106 elif sys.flags.optimize == 1:
2107 fname = pycache_opt1
2108 else:
2109 fname = pycache_opt2
2110 arcname = file_pyc
2111 else:
2112 fname = arcname = file_py
2113 else:
2114 # new mode: use given optimization level
2115 if self._optimize == 0:
2116 fname = pycache_opt0
2117 arcname = file_pyc
2118 else:
2119 arcname = file_pyc
2120 if self._optimize == 1:
2121 fname = pycache_opt1
2122 elif self._optimize == 2:
2123 fname = pycache_opt2
2124 else:
2125 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2126 raise ValueError(msg)
2127 if not (os.path.isfile(fname) and
2128 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2129 if not _compile(file_py, optimize=self._optimize):
2130 fname = arcname = file_py
2131 archivename = os.path.split(arcname)[1]
2132 if basename:
2133 archivename = "%s/%s" % (basename, archivename)
2134 return (fname, archivename)
2135
2136
2137def _parents(path):
2138 """
2139 Given a path with elements separated by
2140 posixpath.sep, generate all parents of that path.
2141
2142 >>> list(_parents('b/d'))
2143 ['b']
2144 >>> list(_parents('/b/d/'))
2145 ['/b']
2146 >>> list(_parents('b/d/f/'))
2147 ['b/d', 'b']
2148 >>> list(_parents('b'))
2149 []
2150 >>> list(_parents(''))
2151 []
2152 """
2153 return itertools.islice(_ancestry(path), 1, None)
2154
2155
2156def _ancestry(path):
2157 """
2158 Given a path with elements separated by
2159 posixpath.sep, generate all elements of that path
2160
2161 >>> list(_ancestry('b/d'))
2162 ['b/d', 'b']
2163 >>> list(_ancestry('/b/d/'))
2164 ['/b/d', '/b']
2165 >>> list(_ancestry('b/d/f/'))
2166 ['b/d/f', 'b/d', 'b']
2167 >>> list(_ancestry('b'))
2168 ['b']
2169 >>> list(_ancestry(''))
2170 []
2171 """
2172 path = path.rstrip(posixpath.sep)
2173 while path and path != posixpath.sep:
2174 yield path
2175 path, tail = posixpath.split(path)
2176
2177
2178_dedupe = dict.fromkeys
2179"""Deduplicate an iterable in original order"""
2180
2181
2182def _difference(minuend, subtrahend):
2183 """
2184 Return items in minuend not in subtrahend, retaining order
2185 with O(1) lookup.
2186 """
2187 return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2188
2189
2190class CompleteDirs(ZipFile):
2191 """
2192 A ZipFile subclass that ensures that implied directories
2193 are always included in the namelist.
2194 """
2195
2196 @staticmethod
2197 def _implied_dirs(names):
2198 parents = itertools.chain.from_iterable(map(_parents, names))
2199 as_dirs = (p + posixpath.sep for p in parents)
2200 return _dedupe(_difference(as_dirs, names))
2201
2202 def namelist(self):
2203 names = super(CompleteDirs, self).namelist()
2204 return names + list(self._implied_dirs(names))
2205
2206 def _name_set(self):
2207 return set(self.namelist())
2208
2209 def resolve_dir(self, name):
2210 """
2211 If the name represents a directory, return that name
2212 as a directory (with the trailing slash).
2213 """
2214 names = self._name_set()
2215 dirname = name + '/'
2216 dir_match = name not in names and dirname in names
2217 return dirname if dir_match else name
2218
2219 @classmethod
2220 def make(cls, source):
2221 """
2222 Given a source (filename or zipfile), return an
2223 appropriate CompleteDirs subclass.
2224 """
2225 if isinstance(source, CompleteDirs):
2226 return source
2227
2228 if not isinstance(source, ZipFile):
2229 return cls(source)
2230
2231 # Only allow for FastPath when supplied zipfile is read-only
2232 if 'r' not in source.mode:
2233 cls = CompleteDirs
2234
2235 res = cls.__new__(cls)
2236 vars(res).update(vars(source))
2237 return res
2238
2239
2240class FastLookup(CompleteDirs):
2241 """
2242 ZipFile subclass to ensure implicit
2243 dirs exist and are resolved rapidly.
2244 """
2245 def namelist(self):
2246 with contextlib.suppress(AttributeError):
2247 return self.__names
2248 self.__names = super(FastLookup, self).namelist()
2249 return self.__names
2250
2251 def _name_set(self):
2252 with contextlib.suppress(AttributeError):
2253 return self.__lookup
2254 self.__lookup = super(FastLookup, self)._name_set()
2255 return self.__lookup
2256
2257
2258class Path:
2259 """
2260 A pathlib-compatible interface for zip files.
2261
2262 Consider a zip file with this structure::
2263
2264 .
2265 ├── a.txt
2266 └── b
2267 ├── c.txt
2268 └── d
2269 └── e.txt
2270
2271 >>> data = io.BytesIO()
2272 >>> zf = ZipFile(data, 'w')
2273 >>> zf.writestr('a.txt', 'content of a')
2274 >>> zf.writestr('b/c.txt', 'content of c')
2275 >>> zf.writestr('b/d/e.txt', 'content of e')
2276 >>> zf.filename = 'abcde.zip'
2277
2278 Path accepts the zipfile object itself or a filename
2279
2280 >>> root = Path(zf)
2281
2282 From there, several path operations are available.
2283
2284 Directory iteration (including the zip file itself):
2285
2286 >>> a, b = root.iterdir()
2287 >>> a
2288 Path('abcde.zip', 'a.txt')
2289 >>> b
2290 Path('abcde.zip', 'b/')
2291
2292 name property:
2293
2294 >>> b.name
2295 'b'
2296
2297 join with divide operator:
2298
2299 >>> c = b / 'c.txt'
2300 >>> c
2301 Path('abcde.zip', 'b/c.txt')
2302 >>> c.name
2303 'c.txt'
2304
2305 Read text:
2306
2307 >>> c.read_text()
2308 'content of c'
2309
2310 existence:
2311
2312 >>> c.exists()
2313 True
2314 >>> (b / 'missing.txt').exists()
2315 False
2316
2317 Coercion to string:
2318
2319 >>> str(c)
2320 'abcde.zip/b/c.txt'
2321 """
2322
2323 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2324
2325 def __init__(self, root, at=""):
2326 self.root = FastLookup.make(root)
2327 self.at = at
2328
2329 @property
2330 def open(self):
2331 return functools.partial(self.root.open, self.at)
2332
2333 @property
2334 def name(self):
2335 return posixpath.basename(self.at.rstrip("/"))
2336
2337 def read_text(self, *args, **kwargs):
2338 with self.open() as strm:
2339 return io.TextIOWrapper(strm, *args, **kwargs).read()
2340
2341 def read_bytes(self):
2342 with self.open() as strm:
2343 return strm.read()
2344
2345 def _is_child(self, path):
2346 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2347
2348 def _next(self, at):
2349 return Path(self.root, at)
2350
2351 def is_dir(self):
2352 return not self.at or self.at.endswith("/")
2353
2354 def is_file(self):
2355 return not self.is_dir()
2356
2357 def exists(self):
2358 return self.at in self.root._name_set()
2359
2360 def iterdir(self):
2361 if not self.is_dir():
2362 raise ValueError("Can't listdir a file")
2363 subs = map(self._next, self.root.namelist())
2364 return filter(self._is_child, subs)
2365
2366 def __str__(self):
2367 return posixpath.join(self.root.filename, self.at)
2368
2369 def __repr__(self):
2370 return self.__repr.format(self=self)
2371
2372 def joinpath(self, add):
2373 next = posixpath.join(self.at, add)
2374 return self._next(self.root.resolve_dir(next))
2375
2376 __truediv__ = joinpath
2377
2378 @property
2379 def parent(self):
2380 parent_at = posixpath.dirname(self.at.rstrip('/'))
2381 if parent_at:
2382 parent_at += '/'
2383 return self._next(parent_at)
2384
2385
2386def main(args=None):
2387 import argparse
2388
2389 description = 'A simple command-line interface for zipfile module.'
2390 parser = argparse.ArgumentParser(description=description)
2391 group = parser.add_mutually_exclusive_group(required=True)
2392 group.add_argument('-l', '--list', metavar='<zipfile>',
2393 help='Show listing of a zipfile')
2394 group.add_argument('-e', '--extract', nargs=2,
2395 metavar=('<zipfile>', '<output_dir>'),
2396 help='Extract zipfile into target dir')
2397 group.add_argument('-c', '--create', nargs='+',
2398 metavar=('<name>', '<file>'),
2399 help='Create zipfile from sources')
2400 group.add_argument('-t', '--test', metavar='<zipfile>',
2401 help='Test if a zipfile is valid')
2402 args = parser.parse_args(args)
2403
2404 if args.test is not None:
2405 src = args.test
2406 with ZipFile(src, 'r') as zf:
2407 badfile = zf.testzip()
2408 if badfile:
2409 print("The following enclosed file is corrupted: {!r}".format(badfile))
2410 print("Done testing")
2411
2412 elif args.list is not None:
2413 src = args.list
2414 with ZipFile(src, 'r') as zf:
2415 zf.printdir()
2416
2417 elif args.extract is not None:
2418 src, curdir = args.extract
2419 with ZipFile(src, 'r') as zf:
2420 zf.extractall(curdir)
2421
2422 elif args.create is not None:
2423 zip_name = args.create.pop(0)
2424 files = args.create
2425
2426 def addToZip(zf, path, zippath):
2427 if os.path.isfile(path):
2428 zf.write(path, zippath, ZIP_DEFLATED)
2429 elif os.path.isdir(path):
2430 if zippath:
2431 zf.write(path, zippath)
2432 for nm in sorted(os.listdir(path)):
2433 addToZip(zf,
2434 os.path.join(path, nm), os.path.join(zippath, nm))
2435 # else: ignore
2436
2437 with ZipFile(zip_name, 'w') as zf:
2438 for path in files:
2439 zippath = os.path.basename(path)
2440 if not zippath:
2441 zippath = os.path.basename(os.path.dirname(path))
2442 if zippath in ('', os.curdir, os.pardir):
2443 zippath = ''
2444 addToZip(zf, path, zippath)
2445
2446
2447if __name__ == "__main__":
2448 main()