this repo has no description
at trunk 2448 lines 88 kB view raw
1#!/usr/bin/env python3 2# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) 3# WARNING: This is a temporary copy of code from the cpython library to 4# facilitate bringup. Please file a task for anything you change! 5# flake8: noqa 6# fmt: off 7""" 8Read and write ZIP files. 9 10XXX references to utf-8 need further investigation. 11""" 12import binascii 13import contextlib 14import functools 15import importlib.util 16import io 17import itertools 18import os 19import posixpath 20import shutil 21import stat 22import struct 23import sys 24import threading 25import time 26 27try: 28 import zlib # We may need its compression method 29 crc32 = zlib.crc32 30except ImportError: 31 zlib = None 32 crc32 = binascii.crc32 33 34try: 35 import bz2 # We may need its compression method 36except ImportError: 37 bz2 = None 38 39try: 40 import lzma # We may need its compression method 41except ImportError: 42 lzma = None 43 44__all__ = ["BadZipFile", "BadZipfile", "error", 45 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", 46 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", 47 "Path"] 48 49class BadZipFile(Exception): 50 pass 51 52 53class LargeZipFile(Exception): 54 """ 55 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 56 and those extensions are disabled. 57 """ 58 59error = BadZipfile = BadZipFile # Pre-3.2 compatibility names 60 61 62ZIP64_LIMIT = (1 << 31) - 1 63ZIP_FILECOUNT_LIMIT = (1 << 16) - 1 64ZIP_MAX_COMMENT = (1 << 16) - 1 65 66# constants for Zip file compression methods 67ZIP_STORED = 0 68ZIP_DEFLATED = 8 69ZIP_BZIP2 = 12 70ZIP_LZMA = 14 71# Other ZIP compression methods not supported 72 73DEFAULT_VERSION = 20 74ZIP64_VERSION = 45 75BZIP2_VERSION = 46 76LZMA_VERSION = 63 77# we recognize (but not necessarily support) all features up to that version 78MAX_EXTRACT_VERSION = 63 79 80# Below are some formats and associated data for reading/writing headers using 81# the struct module. The names and structures of headers/records are those used 82# in the PKWARE description of the ZIP file format: 83# http://www.pkware.com/documents/casestudies/APPNOTE.TXT 84# (URL valid as of January 2008) 85 86# The "end of central directory" structure, magic number, size, and indices 87# (section V.I in the format document) 88structEndArchive = b"<4s4H2LH" 89stringEndArchive = b"PK\005\006" 90sizeEndCentDir = struct.calcsize(structEndArchive) 91 92_ECD_SIGNATURE = 0 93_ECD_DISK_NUMBER = 1 94_ECD_DISK_START = 2 95_ECD_ENTRIES_THIS_DISK = 3 96_ECD_ENTRIES_TOTAL = 4 97_ECD_SIZE = 5 98_ECD_OFFSET = 6 99_ECD_COMMENT_SIZE = 7 100# These last two indices are not part of the structure as defined in the 101# spec, but they are used internally by this module as a convenience 102_ECD_COMMENT = 8 103_ECD_LOCATION = 9 104 105# The "central directory" structure, magic number, size, and indices 106# of entries in the structure (section V.F in the format document) 107structCentralDir = "<4s4B4HL2L5H2L" 108stringCentralDir = b"PK\001\002" 109sizeCentralDir = struct.calcsize(structCentralDir) 110 111# indexes of entries in the central directory structure 112_CD_SIGNATURE = 0 113_CD_CREATE_VERSION = 1 114_CD_CREATE_SYSTEM = 2 115_CD_EXTRACT_VERSION = 3 116_CD_EXTRACT_SYSTEM = 4 117_CD_FLAG_BITS = 5 118_CD_COMPRESS_TYPE = 6 119_CD_TIME = 7 120_CD_DATE = 8 121_CD_CRC = 9 122_CD_COMPRESSED_SIZE = 10 123_CD_UNCOMPRESSED_SIZE = 11 124_CD_FILENAME_LENGTH = 12 125_CD_EXTRA_FIELD_LENGTH = 13 126_CD_COMMENT_LENGTH = 14 127_CD_DISK_NUMBER_START = 15 128_CD_INTERNAL_FILE_ATTRIBUTES = 16 129_CD_EXTERNAL_FILE_ATTRIBUTES = 17 130_CD_LOCAL_HEADER_OFFSET = 18 131 132# The "local file header" structure, magic number, size, and indices 133# (section V.A in the format document) 134structFileHeader = "<4s2B4HL2L2H" 135stringFileHeader = b"PK\003\004" 136sizeFileHeader = struct.calcsize(structFileHeader) 137 138_FH_SIGNATURE = 0 139_FH_EXTRACT_VERSION = 1 140_FH_EXTRACT_SYSTEM = 2 141_FH_GENERAL_PURPOSE_FLAG_BITS = 3 142_FH_COMPRESSION_METHOD = 4 143_FH_LAST_MOD_TIME = 5 144_FH_LAST_MOD_DATE = 6 145_FH_CRC = 7 146_FH_COMPRESSED_SIZE = 8 147_FH_UNCOMPRESSED_SIZE = 9 148_FH_FILENAME_LENGTH = 10 149_FH_EXTRA_FIELD_LENGTH = 11 150 151# The "Zip64 end of central directory locator" structure, magic number, and size 152structEndArchive64Locator = "<4sLQL" 153stringEndArchive64Locator = b"PK\x06\x07" 154sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 155 156# The "Zip64 end of central directory" record, magic number, size, and indices 157# (section V.G in the format document) 158structEndArchive64 = "<4sQ2H2L4Q" 159stringEndArchive64 = b"PK\x06\x06" 160sizeEndCentDir64 = struct.calcsize(structEndArchive64) 161 162_CD64_SIGNATURE = 0 163_CD64_DIRECTORY_RECSIZE = 1 164_CD64_CREATE_VERSION = 2 165_CD64_EXTRACT_VERSION = 3 166_CD64_DISK_NUMBER = 4 167_CD64_DISK_NUMBER_START = 5 168_CD64_NUMBER_ENTRIES_THIS_DISK = 6 169_CD64_NUMBER_ENTRIES_TOTAL = 7 170_CD64_DIRECTORY_SIZE = 8 171_CD64_OFFSET_START_CENTDIR = 9 172 173_DD_SIGNATURE = 0x08074b50 174 175# TODO(T65337126): Uncomment next line 176# _EXTRA_FIELD_STRUCT = struct.Struct('<HH') 177 178def _strip_extra(extra, xids): 179 # TODO(T65337126): Remove next line 180 _EXTRA_FIELD_STRUCT = struct.Struct('<HH') 181 # Remove Extra Fields with specified IDs. 182 unpack = _EXTRA_FIELD_STRUCT.unpack 183 modified = False 184 buffer = [] 185 start = i = 0 186 while i + 4 <= len(extra): 187 xid, xlen = unpack(extra[i : i + 4]) 188 j = i + 4 + xlen 189 if xid in xids: 190 if i != start: 191 buffer.append(extra[start : i]) 192 start = j 193 modified = True 194 i = j 195 if not modified: 196 return extra 197 return b''.join(buffer) 198 199def _check_zipfile(fp): 200 try: 201 if _EndRecData(fp): 202 return True # file has correct magic number 203 except OSError: 204 pass 205 return False 206 207def is_zipfile(filename): 208 """Quickly see if a file is a ZIP file by checking the magic number. 209 210 The filename argument may be a file or file-like object too. 211 """ 212 result = False 213 try: 214 if hasattr(filename, "read"): 215 result = _check_zipfile(fp=filename) 216 else: 217 with open(filename, "rb") as fp: 218 result = _check_zipfile(fp) 219 except OSError: 220 pass 221 return result 222 223def _EndRecData64(fpin, offset, endrec): 224 """ 225 Read the ZIP64 end-of-archive records and use that to update endrec 226 """ 227 try: 228 fpin.seek(offset - sizeEndCentDir64Locator, 2) 229 except OSError: 230 # If the seek fails, the file is not large enough to contain a ZIP64 231 # end-of-archive record, so just return the end record we were given. 232 return endrec 233 234 data = fpin.read(sizeEndCentDir64Locator) 235 if len(data) != sizeEndCentDir64Locator: 236 return endrec 237 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 238 if sig != stringEndArchive64Locator: 239 return endrec 240 241 if diskno != 0 or disks > 1: 242 raise BadZipFile("zipfiles that span multiple disks are not supported") 243 244 # Assume no 'zip64 extensible data' 245 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 246 data = fpin.read(sizeEndCentDir64) 247 if len(data) != sizeEndCentDir64: 248 return endrec 249 sig, sz, create_version, read_version, disk_num, disk_dir, \ 250 dircount, dircount2, dirsize, diroffset = \ 251 struct.unpack(structEndArchive64, data) 252 if sig != stringEndArchive64: 253 return endrec 254 255 # Update the original endrec using data from the ZIP64 record 256 endrec[_ECD_SIGNATURE] = sig 257 endrec[_ECD_DISK_NUMBER] = disk_num 258 endrec[_ECD_DISK_START] = disk_dir 259 endrec[_ECD_ENTRIES_THIS_DISK] = dircount 260 endrec[_ECD_ENTRIES_TOTAL] = dircount2 261 endrec[_ECD_SIZE] = dirsize 262 endrec[_ECD_OFFSET] = diroffset 263 return endrec 264 265 266def _EndRecData(fpin): 267 """Return data from the "End of Central Directory" record, or None. 268 269 The data is a list of the nine items in the ZIP "End of central dir" 270 record followed by a tenth item, the file seek offset of this record.""" 271 272 # Determine file size 273 fpin.seek(0, 2) 274 filesize = fpin.tell() 275 276 # Check to see if this is ZIP file with no archive comment (the 277 # "end of central directory" structure should be the last item in the 278 # file if this is the case). 279 try: 280 fpin.seek(-sizeEndCentDir, 2) 281 except OSError: 282 return None 283 data = fpin.read() 284 if (len(data) == sizeEndCentDir and 285 data[0:4] == stringEndArchive and 286 data[-2:] == b"\000\000"): 287 # the signature is correct and there's no comment, unpack structure 288 endrec = struct.unpack(structEndArchive, data) 289 endrec=list(endrec) 290 291 # Append a blank comment and record start offset 292 endrec.append(b"") 293 endrec.append(filesize - sizeEndCentDir) 294 295 # Try to read the "Zip64 end of central directory" structure 296 return _EndRecData64(fpin, -sizeEndCentDir, endrec) 297 298 # Either this is not a ZIP file, or it is a ZIP file with an archive 299 # comment. Search the end of the file for the "end of central directory" 300 # record signature. The comment is the last item in the ZIP file and may be 301 # up to 64K long. It is assumed that the "end of central directory" magic 302 # number does not appear in the comment. 303 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 304 fpin.seek(maxCommentStart, 0) 305 data = fpin.read() 306 start = data.rfind(stringEndArchive) 307 if start >= 0: 308 # found the magic number; attempt to unpack and interpret 309 recData = data[start:start+sizeEndCentDir] 310 if len(recData) != sizeEndCentDir: 311 # Zip file is corrupted. 312 return None 313 endrec = list(struct.unpack(structEndArchive, recData)) 314 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 315 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 316 endrec.append(comment) 317 endrec.append(maxCommentStart + start) 318 319 # Try to read the "Zip64 end of central directory" structure 320 return _EndRecData64(fpin, maxCommentStart + start - filesize, 321 endrec) 322 323 # Unable to find a valid end of central directory structure 324 return None 325 326 327class ZipInfo (object): 328 """Class with attributes describing each file in the ZIP archive.""" 329 330 __slots__ = ( 331 'orig_filename', 332 'filename', 333 'date_time', 334 'compress_type', 335 '_compresslevel', 336 'comment', 337 'extra', 338 'create_system', 339 'create_version', 340 'extract_version', 341 'reserved', 342 'flag_bits', 343 'volume', 344 'internal_attr', 345 'external_attr', 346 'header_offset', 347 'CRC', 348 'compress_size', 349 'file_size', 350 '_raw_time', 351 ) 352 353 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 354 self.orig_filename = filename # Original file name in archive 355 356 # Terminate the file name at the first null byte. Null bytes in file 357 # names are used as tricks by viruses in archives. 358 null_byte = filename.find(chr(0)) 359 if null_byte >= 0: 360 filename = filename[0:null_byte] 361 # This is used to ensure paths in generated ZIP files always use 362 # forward slashes as the directory separator, as required by the 363 # ZIP format specification. 364 if os.sep != "/" and os.sep in filename: 365 filename = filename.replace(os.sep, "/") 366 367 self.filename = filename # Normalized file name 368 self.date_time = date_time # year, month, day, hour, min, sec 369 370 if date_time[0] < 1980: 371 raise ValueError('ZIP does not support timestamps before 1980') 372 373 # Standard values: 374 self.compress_type = ZIP_STORED # Type of compression for the file 375 self._compresslevel = None # Level for the compressor 376 self.comment = b"" # Comment for each file 377 self.extra = b"" # ZIP extra data 378 if sys.platform == 'win32': 379 self.create_system = 0 # System which created ZIP archive 380 else: 381 # Assume everything else is unix-y 382 self.create_system = 3 # System which created ZIP archive 383 self.create_version = DEFAULT_VERSION # Version which created ZIP archive 384 self.extract_version = DEFAULT_VERSION # Version needed to extract archive 385 self.reserved = 0 # Must be zero 386 self.flag_bits = 0 # ZIP flag bits 387 self.volume = 0 # Volume number of file header 388 self.internal_attr = 0 # Internal attributes 389 self.external_attr = 0 # External file attributes 390 # Other attributes are set by class ZipFile: 391 # header_offset Byte offset to the file header 392 # CRC CRC-32 of the uncompressed file 393 # compress_size Size of the compressed file 394 # file_size Size of the uncompressed file 395 396 def __repr__(self): 397 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)] 398 if self.compress_type != ZIP_STORED: 399 result.append(' compress_type=%s' % 400 compressor_names.get(self.compress_type, 401 self.compress_type)) 402 hi = self.external_attr >> 16 403 lo = self.external_attr & 0xFFFF 404 if hi: 405 result.append(' filemode=%r' % stat.filemode(hi)) 406 if lo: 407 result.append(' external_attr=%#x' % lo) 408 isdir = self.is_dir() 409 if not isdir or self.file_size: 410 result.append(' file_size=%r' % self.file_size) 411 if ((not isdir or self.compress_size) and 412 (self.compress_type != ZIP_STORED or 413 self.file_size != self.compress_size)): 414 result.append(' compress_size=%r' % self.compress_size) 415 result.append('>') 416 return ''.join(result) 417 418 def FileHeader(self, zip64=None): 419 """Return the per-file header as a bytes object.""" 420 dt = self.date_time 421 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 422 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 423 if self.flag_bits & 0x08: 424 # Set these to zero because we write them after the file data 425 CRC = compress_size = file_size = 0 426 else: 427 CRC = self.CRC 428 compress_size = self.compress_size 429 file_size = self.file_size 430 431 extra = self.extra 432 433 min_version = 0 434 if zip64 is None: 435 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 436 if zip64: 437 fmt = '<HHQQ' 438 extra = extra + struct.pack(fmt, 439 1, struct.calcsize(fmt)-4, file_size, compress_size) 440 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: 441 if not zip64: 442 raise LargeZipFile("Filesize would require ZIP64 extensions") 443 # File is larger than what fits into a 4 byte integer, 444 # fall back to the ZIP64 extension 445 file_size = 0xffffffff 446 compress_size = 0xffffffff 447 min_version = ZIP64_VERSION 448 449 if self.compress_type == ZIP_BZIP2: 450 min_version = max(BZIP2_VERSION, min_version) 451 elif self.compress_type == ZIP_LZMA: 452 min_version = max(LZMA_VERSION, min_version) 453 454 self.extract_version = max(min_version, self.extract_version) 455 self.create_version = max(min_version, self.create_version) 456 filename, flag_bits = self._encodeFilenameFlags() 457 header = struct.pack(structFileHeader, stringFileHeader, 458 self.extract_version, self.reserved, flag_bits, 459 self.compress_type, dostime, dosdate, CRC, 460 compress_size, file_size, 461 len(filename), len(extra)) 462 return header + filename + extra 463 464 def _encodeFilenameFlags(self): 465 try: 466 return self.filename.encode('ascii'), self.flag_bits 467 except UnicodeEncodeError: 468 return self.filename.encode('utf-8'), self.flag_bits | 0x800 469 470 def _decodeExtra(self): 471 # Try to decode the extra field. 472 extra = self.extra 473 unpack = struct.unpack 474 while len(extra) >= 4: 475 tp, ln = unpack('<HH', extra[:4]) 476 if ln+4 > len(extra): 477 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 478 if tp == 0x0001: 479 if ln >= 24: 480 counts = unpack('<QQQ', extra[4:28]) 481 elif ln == 16: 482 counts = unpack('<QQ', extra[4:20]) 483 elif ln == 8: 484 counts = unpack('<Q', extra[4:12]) 485 elif ln == 0: 486 counts = () 487 else: 488 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 489 490 idx = 0 491 492 # ZIP64 extension (large files and/or large archives) 493 if self.file_size in (0xffffffffffffffff, 0xffffffff): 494 if len(counts) <= idx: 495 raise BadZipFile( 496 "Corrupt zip64 extra field. File size not found." 497 ) 498 self.file_size = counts[idx] 499 idx += 1 500 501 if self.compress_size == 0xFFFFFFFF: 502 if len(counts) <= idx: 503 raise BadZipFile( 504 "Corrupt zip64 extra field. Compress size not found." 505 ) 506 self.compress_size = counts[idx] 507 idx += 1 508 509 if self.header_offset == 0xffffffff: 510 if len(counts) <= idx: 511 raise BadZipFile( 512 "Corrupt zip64 extra field. Header offset not found." 513 ) 514 old = self.header_offset 515 self.header_offset = counts[idx] 516 idx+=1 517 518 extra = extra[ln+4:] 519 520 @classmethod 521 def from_file(cls, filename, arcname=None, *, strict_timestamps=True): 522 """Construct an appropriate ZipInfo for a file on the filesystem. 523 524 filename should be the path to a file or directory on the filesystem. 525 526 arcname is the name which it will have within the archive (by default, 527 this will be the same as filename, but without a drive letter and with 528 leading path separators removed). 529 """ 530 if isinstance(filename, os.PathLike): 531 filename = os.fspath(filename) 532 st = os.stat(filename) 533 isdir = stat.S_ISDIR(st.st_mode) 534 mtime = time.localtime(st.st_mtime) 535 date_time = mtime[0:6] 536 if not strict_timestamps and date_time[0] < 1980: 537 date_time = (1980, 1, 1, 0, 0, 0) 538 elif not strict_timestamps and date_time[0] > 2107: 539 date_time = (2107, 12, 31, 23, 59, 59) 540 # Create ZipInfo instance to store file information 541 if arcname is None: 542 arcname = filename 543 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 544 while arcname[0] in (os.sep, os.altsep): 545 arcname = arcname[1:] 546 if isdir: 547 arcname += '/' 548 zinfo = cls(arcname, date_time) 549 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes 550 if isdir: 551 zinfo.file_size = 0 552 zinfo.external_attr |= 0x10 # MS-DOS directory flag 553 else: 554 zinfo.file_size = st.st_size 555 556 return zinfo 557 558 def is_dir(self): 559 """Return True if this archive member is a directory.""" 560 return self.filename[-1] == '/' 561 562 563# ZIP encryption uses the CRC32 one-byte primitive for scrambling some 564# internal keys. We noticed that a direct implementation is faster than 565# relying on binascii.crc32(). 566 567_crctable = None 568def _gen_crc(crc): 569 for j in range(8): 570 if crc & 1: 571 crc = (crc >> 1) ^ 0xEDB88320 572 else: 573 crc >>= 1 574 return crc 575 576# ZIP supports a password-based form of encryption. Even though known 577# plaintext attacks have been found against it, it is still useful 578# to be able to get data out of such a file. 579# 580# Usage: 581# zd = _ZipDecrypter(mypwd) 582# plain_bytes = zd(cypher_bytes) 583 584def _ZipDecrypter(pwd): 585 key0 = 305419896 586 key1 = 591751049 587 key2 = 878082192 588 589 global _crctable 590 if _crctable is None: 591 _crctable = list(map(_gen_crc, range(256))) 592 crctable = _crctable 593 594 def crc32(ch, crc): 595 """Compute the CRC32 primitive on one byte.""" 596 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] 597 598 def update_keys(c): 599 nonlocal key0, key1, key2 600 key0 = crc32(c, key0) 601 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF 602 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF 603 key2 = crc32(key1 >> 24, key2) 604 605 for p in pwd: 606 update_keys(p) 607 608 def decrypter(data): 609 """Decrypt a bytes object.""" 610 result = bytearray() 611 append = result.append 612 for c in data: 613 k = key2 | 2 614 c ^= ((k * (k^1)) >> 8) & 0xFF 615 update_keys(c) 616 append(c) 617 return bytes(result) 618 619 return decrypter 620 621 622class LZMACompressor: 623 624 def __init__(self): 625 self._comp = None 626 627 def _init(self): 628 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1}) 629 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[ 630 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props) 631 ]) 632 return struct.pack('<BBH', 9, 4, len(props)) + props 633 634 def compress(self, data): 635 if self._comp is None: 636 return self._init() + self._comp.compress(data) 637 return self._comp.compress(data) 638 639 def flush(self): 640 if self._comp is None: 641 return self._init() + self._comp.flush() 642 return self._comp.flush() 643 644 645class LZMADecompressor: 646 647 def __init__(self): 648 self._decomp = None 649 self._unconsumed = b'' 650 self.eof = False 651 652 def decompress(self, data): 653 if self._decomp is None: 654 self._unconsumed += data 655 if len(self._unconsumed) <= 4: 656 return b'' 657 psize, = struct.unpack('<H', self._unconsumed[2:4]) 658 if len(self._unconsumed) <= 4 + psize: 659 return b'' 660 661 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[ 662 lzma._decode_filter_properties(lzma.FILTER_LZMA1, 663 self._unconsumed[4:4 + psize]) 664 ]) 665 data = self._unconsumed[4 + psize:] 666 del self._unconsumed 667 668 result = self._decomp.decompress(data) 669 self.eof = self._decomp.eof 670 return result 671 672 673compressor_names = { 674 0: 'store', 675 1: 'shrink', 676 2: 'reduce', 677 3: 'reduce', 678 4: 'reduce', 679 5: 'reduce', 680 6: 'implode', 681 7: 'tokenize', 682 8: 'deflate', 683 9: 'deflate64', 684 10: 'implode', 685 12: 'bzip2', 686 14: 'lzma', 687 18: 'terse', 688 19: 'lz77', 689 97: 'wavpack', 690 98: 'ppmd', 691} 692 693def _check_compression(compression): 694 if compression == ZIP_STORED: 695 pass 696 elif compression == ZIP_DEFLATED: 697 if not zlib: 698 raise RuntimeError( 699 "Compression requires the (missing) zlib module") 700 elif compression == ZIP_BZIP2: 701 if not bz2: 702 raise RuntimeError( 703 "Compression requires the (missing) bz2 module") 704 elif compression == ZIP_LZMA: 705 if not lzma: 706 raise RuntimeError( 707 "Compression requires the (missing) lzma module") 708 else: 709 raise NotImplementedError("That compression method is not supported") 710 711 712def _get_compressor(compress_type, compresslevel=None): 713 if compress_type == ZIP_DEFLATED: 714 if compresslevel is not None: 715 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15) 716 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) 717 elif compress_type == ZIP_BZIP2: 718 if compresslevel is not None: 719 return bz2.BZ2Compressor(compresslevel) 720 return bz2.BZ2Compressor() 721 # compresslevel is ignored for ZIP_LZMA 722 elif compress_type == ZIP_LZMA: 723 return LZMACompressor() 724 else: 725 return None 726 727 728def _get_decompressor(compress_type): 729 _check_compression(compress_type) 730 if compress_type == ZIP_STORED: 731 return None 732 elif compress_type == ZIP_DEFLATED: 733 return zlib.decompressobj(-15) 734 elif compress_type == ZIP_BZIP2: 735 return bz2.BZ2Decompressor() 736 elif compress_type == ZIP_LZMA: 737 return LZMADecompressor() 738 else: 739 descr = compressor_names.get(compress_type) 740 if descr: 741 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr)) 742 else: 743 raise NotImplementedError("compression type %d" % (compress_type,)) 744 745 746class _SharedFile: 747 def __init__(self, file, pos, close, lock, writing): 748 self._file = file 749 self._pos = pos 750 self._close = close 751 self._lock = lock 752 self._writing = writing 753 self.seekable = file.seekable 754 self.tell = file.tell 755 756 def seek(self, offset, whence=0): 757 with self._lock: 758 if self._writing(): 759 raise ValueError("Can't reposition in the ZIP file while " 760 "there is an open writing handle on it. " 761 "Close the writing handle before trying to read.") 762 self._file.seek(offset, whence) 763 self._pos = self._file.tell() 764 return self._pos 765 766 def read(self, n=-1): 767 with self._lock: 768 if self._writing(): 769 raise ValueError("Can't read from the ZIP file while there " 770 "is an open writing handle on it. " 771 "Close the writing handle before trying to read.") 772 self._file.seek(self._pos) 773 data = self._file.read(n) 774 self._pos = self._file.tell() 775 return data 776 777 def close(self): 778 if self._file is not None: 779 fileobj = self._file 780 self._file = None 781 self._close(fileobj) 782 783# Provide the tell method for unseekable stream 784class _Tellable: 785 def __init__(self, fp): 786 self.fp = fp 787 self.offset = 0 788 789 def write(self, data): 790 n = self.fp.write(data) 791 self.offset += n 792 return n 793 794 def tell(self): 795 return self.offset 796 797 def flush(self): 798 self.fp.flush() 799 800 def close(self): 801 self.fp.close() 802 803 804class ZipExtFile(io.BufferedIOBase): 805 """File-like object for reading an archive member. 806 Is returned by ZipFile.open(). 807 """ 808 809 # Max size supported by decompressor. 810 MAX_N = 1 << 31 - 1 811 812 # Read from compressed files in 4k blocks. 813 MIN_READ_SIZE = 4096 814 815 # Chunk size to read during seek 816 MAX_SEEK_READ = 1 << 24 817 818 def __init__(self, fileobj, mode, zipinfo, pwd=None, 819 close_fileobj=False): 820 self._fileobj = fileobj 821 self._pwd = pwd 822 self._close_fileobj = close_fileobj 823 824 self._compress_type = zipinfo.compress_type 825 self._compress_left = zipinfo.compress_size 826 self._left = zipinfo.file_size 827 828 self._decompressor = _get_decompressor(self._compress_type) 829 830 self._eof = False 831 self._readbuffer = b'' 832 self._offset = 0 833 834 self.newlines = None 835 836 self.mode = mode 837 self.name = zipinfo.filename 838 839 if hasattr(zipinfo, 'CRC'): 840 self._expected_crc = zipinfo.CRC 841 self._running_crc = crc32(b'') 842 else: 843 self._expected_crc = None 844 845 self._seekable = False 846 try: 847 if fileobj.seekable(): 848 self._orig_compress_start = fileobj.tell() 849 self._orig_compress_size = zipinfo.compress_size 850 self._orig_file_size = zipinfo.file_size 851 self._orig_start_crc = self._running_crc 852 self._seekable = True 853 except AttributeError: 854 pass 855 856 self._decrypter = None 857 if pwd: 858 if zipinfo.flag_bits & 0x8: 859 # compare against the file type from extended local headers 860 check_byte = (zipinfo._raw_time >> 8) & 0xff 861 else: 862 # compare against the CRC otherwise 863 check_byte = (zipinfo.CRC >> 24) & 0xff 864 h = self._init_decrypter() 865 if h != check_byte: 866 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename) 867 868 869 def _init_decrypter(self): 870 self._decrypter = _ZipDecrypter(self._pwd) 871 # The first 12 bytes in the cypher stream is an encryption header 872 # used to strengthen the algorithm. The first 11 bytes are 873 # completely random, while the 12th contains the MSB of the CRC, 874 # or the MSB of the file time depending on the header type 875 # and is used to check the correctness of the password. 876 header = self._fileobj.read(12) 877 self._compress_left -= 12 878 return self._decrypter(header)[11] 879 880 def __repr__(self): 881 result = ['<%s.%s' % (self.__class__.__module__, 882 self.__class__.__qualname__)] 883 if not self.closed: 884 result.append(' name=%r mode=%r' % (self.name, self.mode)) 885 if self._compress_type != ZIP_STORED: 886 result.append(' compress_type=%s' % 887 compressor_names.get(self._compress_type, 888 self._compress_type)) 889 else: 890 result.append(' [closed]') 891 result.append('>') 892 return ''.join(result) 893 894 def readline(self, limit=-1): 895 """Read and return a line from the stream. 896 897 If limit is specified, at most limit bytes will be read. 898 """ 899 900 if limit < 0: 901 # Shortcut common case - newline found in buffer. 902 i = self._readbuffer.find(b'\n', self._offset) + 1 903 if i > 0: 904 line = self._readbuffer[self._offset: i] 905 self._offset = i 906 return line 907 908 return io.BufferedIOBase.readline(self, limit) 909 910 def peek(self, n=1): 911 """Returns buffered bytes without advancing the position.""" 912 if n > len(self._readbuffer) - self._offset: 913 chunk = self.read(n) 914 if len(chunk) > self._offset: 915 self._readbuffer = chunk + self._readbuffer[self._offset:] 916 self._offset = 0 917 else: 918 self._offset -= len(chunk) 919 920 # Return up to 512 bytes to reduce allocation overhead for tight loops. 921 return self._readbuffer[self._offset: self._offset + 512] 922 923 def readable(self): 924 return True 925 926 def read(self, n=-1): 927 """Read and return up to n bytes. 928 If the argument is omitted, None, or negative, data is read and returned until EOF is reached. 929 """ 930 if n is None or n < 0: 931 buf = self._readbuffer[self._offset:] 932 self._readbuffer = b'' 933 self._offset = 0 934 while not self._eof: 935 buf += self._read1(self.MAX_N) 936 return buf 937 938 end = n + self._offset 939 if end < len(self._readbuffer): 940 buf = self._readbuffer[self._offset:end] 941 self._offset = end 942 return buf 943 944 n = end - len(self._readbuffer) 945 buf = self._readbuffer[self._offset:] 946 self._readbuffer = b'' 947 self._offset = 0 948 while n > 0 and not self._eof: 949 data = self._read1(n) 950 if n < len(data): 951 self._readbuffer = data 952 self._offset = n 953 buf += data[:n] 954 break 955 buf += data 956 n -= len(data) 957 return buf 958 959 def _update_crc(self, newdata): 960 # Update the CRC using the given data. 961 if self._expected_crc is None: 962 # No need to compute the CRC if we don't have a reference value 963 return 964 self._running_crc = crc32(newdata, self._running_crc) 965 # Check the CRC if we're at the end of the file 966 if self._eof and self._running_crc != self._expected_crc: 967 raise BadZipFile("Bad CRC-32 for file %r" % self.name) 968 969 def read1(self, n): 970 """Read up to n bytes with at most one read() system call.""" 971 972 if n is None or n < 0: 973 buf = self._readbuffer[self._offset:] 974 self._readbuffer = b'' 975 self._offset = 0 976 while not self._eof: 977 data = self._read1(self.MAX_N) 978 if data: 979 buf += data 980 break 981 return buf 982 983 end = n + self._offset 984 if end < len(self._readbuffer): 985 buf = self._readbuffer[self._offset:end] 986 self._offset = end 987 return buf 988 989 n = end - len(self._readbuffer) 990 buf = self._readbuffer[self._offset:] 991 self._readbuffer = b'' 992 self._offset = 0 993 if n > 0: 994 while not self._eof: 995 data = self._read1(n) 996 if n < len(data): 997 self._readbuffer = data 998 self._offset = n 999 buf += data[:n] 1000 break 1001 if data: 1002 buf += data 1003 break 1004 return buf 1005 1006 def _read1(self, n): 1007 # Read up to n compressed bytes with at most one read() system call, 1008 # decrypt and decompress them. 1009 if self._eof or n <= 0: 1010 return b'' 1011 1012 # Read from file. 1013 if self._compress_type == ZIP_DEFLATED: 1014 ## Handle unconsumed data. 1015 data = self._decompressor.unconsumed_tail 1016 if n > len(data): 1017 data += self._read2(n - len(data)) 1018 else: 1019 data = self._read2(n) 1020 1021 if self._compress_type == ZIP_STORED: 1022 self._eof = self._compress_left <= 0 1023 elif self._compress_type == ZIP_DEFLATED: 1024 n = max(n, self.MIN_READ_SIZE) 1025 data = self._decompressor.decompress(data, n) 1026 self._eof = (self._decompressor.eof or 1027 self._compress_left <= 0 and 1028 not self._decompressor.unconsumed_tail) 1029 if self._eof: 1030 data += self._decompressor.flush() 1031 else: 1032 data = self._decompressor.decompress(data) 1033 self._eof = self._decompressor.eof or self._compress_left <= 0 1034 1035 data = data[:self._left] 1036 self._left -= len(data) 1037 if self._left <= 0: 1038 self._eof = True 1039 self._update_crc(data) 1040 return data 1041 1042 def _read2(self, n): 1043 if self._compress_left <= 0: 1044 return b'' 1045 1046 n = max(n, self.MIN_READ_SIZE) 1047 n = min(n, self._compress_left) 1048 1049 data = self._fileobj.read(n) 1050 self._compress_left -= len(data) 1051 if not data: 1052 raise EOFError 1053 1054 if self._decrypter is not None: 1055 data = self._decrypter(data) 1056 return data 1057 1058 def close(self): 1059 try: 1060 if self._close_fileobj: 1061 self._fileobj.close() 1062 finally: 1063 super().close() 1064 1065 def seekable(self): 1066 return self._seekable 1067 1068 def seek(self, offset, whence=0): 1069 if not self._seekable: 1070 raise io.UnsupportedOperation("underlying stream is not seekable") 1071 curr_pos = self.tell() 1072 if whence == 0: # Seek from start of file 1073 new_pos = offset 1074 elif whence == 1: # Seek from current position 1075 new_pos = curr_pos + offset 1076 elif whence == 2: # Seek from EOF 1077 new_pos = self._orig_file_size + offset 1078 else: 1079 raise ValueError("whence must be os.SEEK_SET (0), " 1080 "os.SEEK_CUR (1), or os.SEEK_END (2)") 1081 1082 if new_pos > self._orig_file_size: 1083 new_pos = self._orig_file_size 1084 1085 if new_pos < 0: 1086 new_pos = 0 1087 1088 read_offset = new_pos - curr_pos 1089 buff_offset = read_offset + self._offset 1090 1091 if buff_offset >= 0 and buff_offset < len(self._readbuffer): 1092 # Just move the _offset index if the new position is in the _readbuffer 1093 self._offset = buff_offset 1094 read_offset = 0 1095 elif read_offset < 0: 1096 # Position is before the current position. Reset the ZipExtFile 1097 self._fileobj.seek(self._orig_compress_start) 1098 self._running_crc = self._orig_start_crc 1099 self._compress_left = self._orig_compress_size 1100 self._left = self._orig_file_size 1101 self._readbuffer = b'' 1102 self._offset = 0 1103 self._decompressor = _get_decompressor(self._compress_type) 1104 self._eof = False 1105 read_offset = new_pos 1106 if self._decrypter is not None: 1107 self._init_decrypter() 1108 1109 while read_offset > 0: 1110 read_len = min(self.MAX_SEEK_READ, read_offset) 1111 self.read(read_len) 1112 read_offset -= read_len 1113 1114 return self.tell() 1115 1116 def tell(self): 1117 if not self._seekable: 1118 raise io.UnsupportedOperation("underlying stream is not seekable") 1119 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset 1120 return filepos 1121 1122 1123class _ZipWriteFile(io.BufferedIOBase): 1124 def __init__(self, zf, zinfo, zip64): 1125 self._zinfo = zinfo 1126 self._zip64 = zip64 1127 self._zipfile = zf 1128 self._compressor = _get_compressor(zinfo.compress_type, 1129 zinfo._compresslevel) 1130 self._file_size = 0 1131 self._compress_size = 0 1132 self._crc = 0 1133 1134 @property 1135 def _fileobj(self): 1136 return self._zipfile.fp 1137 1138 def writable(self): 1139 return True 1140 1141 def write(self, data): 1142 if self.closed: 1143 raise ValueError('I/O operation on closed file.') 1144 nbytes = len(data) 1145 self._file_size += nbytes 1146 self._crc = crc32(data, self._crc) 1147 if self._compressor: 1148 data = self._compressor.compress(data) 1149 self._compress_size += len(data) 1150 self._fileobj.write(data) 1151 return nbytes 1152 1153 def close(self): 1154 if self.closed: 1155 return 1156 try: 1157 super().close() 1158 # Flush any data from the compressor, and update header info 1159 if self._compressor: 1160 buf = self._compressor.flush() 1161 self._compress_size += len(buf) 1162 self._fileobj.write(buf) 1163 self._zinfo.compress_size = self._compress_size 1164 else: 1165 self._zinfo.compress_size = self._file_size 1166 self._zinfo.CRC = self._crc 1167 self._zinfo.file_size = self._file_size 1168 1169 # Write updated header info 1170 if self._zinfo.flag_bits & 0x08: 1171 # Write CRC and file sizes after the file data 1172 fmt = '<LLQQ' if self._zip64 else '<LLLL' 1173 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC, 1174 self._zinfo.compress_size, self._zinfo.file_size)) 1175 self._zipfile.start_dir = self._fileobj.tell() 1176 else: 1177 if not self._zip64: 1178 if self._file_size > ZIP64_LIMIT: 1179 raise RuntimeError( 1180 'File size unexpectedly exceeded ZIP64 limit') 1181 if self._compress_size > ZIP64_LIMIT: 1182 raise RuntimeError( 1183 'Compressed size unexpectedly exceeded ZIP64 limit') 1184 # Seek backwards and write file header (which will now include 1185 # correct CRC and file sizes) 1186 1187 # Preserve current position in file 1188 self._zipfile.start_dir = self._fileobj.tell() 1189 self._fileobj.seek(self._zinfo.header_offset) 1190 self._fileobj.write(self._zinfo.FileHeader(self._zip64)) 1191 self._fileobj.seek(self._zipfile.start_dir) 1192 1193 # Successfully written: Add file to our caches 1194 self._zipfile.filelist.append(self._zinfo) 1195 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo 1196 finally: 1197 self._zipfile._writing = False 1198 1199 1200 1201class ZipFile: 1202 """ Class with methods to open, read, write, close, list zip files. 1203 1204 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True, 1205 compresslevel=None) 1206 1207 file: Either the path to the file, or a file-like object. 1208 If it is a path, the file will be opened and closed by ZipFile. 1209 mode: The mode can be either read 'r', write 'w', exclusive create 'x', 1210 or append 'a'. 1211 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), 1212 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). 1213 allowZip64: if True ZipFile will create files with ZIP64 extensions when 1214 needed, otherwise it will raise an exception when this would 1215 be necessary. 1216 compresslevel: None (default for the given compression type) or an integer 1217 specifying the level to pass to the compressor. 1218 When using ZIP_STORED or ZIP_LZMA this keyword has no effect. 1219 When using ZIP_DEFLATED integers 0 through 9 are accepted. 1220 When using ZIP_BZIP2 integers 1 through 9 are accepted. 1221 1222 """ 1223 1224 fp = None # Set here since __del__ checks it 1225 _windows_illegal_name_trans_table = None 1226 1227 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, 1228 compresslevel=None, *, strict_timestamps=True): 1229 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', 1230 or append 'a'.""" 1231 if mode not in ('r', 'w', 'x', 'a'): 1232 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") 1233 1234 _check_compression(compression) 1235 1236 self._allowZip64 = allowZip64 1237 self._didModify = False 1238 self.debug = 0 # Level of printing: 0 through 3 1239 self.NameToInfo = {} # Find file info given name 1240 self.filelist = [] # List of ZipInfo instances for archive 1241 self.compression = compression # Method of compression 1242 self.compresslevel = compresslevel 1243 self.mode = mode 1244 self.pwd = None 1245 self._comment = b'' 1246 self._strict_timestamps = strict_timestamps 1247 1248 # Check if we were passed a file-like object 1249 if isinstance(file, os.PathLike): 1250 file = os.fspath(file) 1251 if isinstance(file, str): 1252 # No, it's a filename 1253 self._filePassed = 0 1254 self.filename = file 1255 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b', 1256 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'} 1257 filemode = modeDict[mode] 1258 while True: 1259 try: 1260 self.fp = io.open(file, filemode) 1261 except OSError: 1262 if filemode in modeDict: 1263 filemode = modeDict[filemode] 1264 continue 1265 raise 1266 break 1267 else: 1268 self._filePassed = 1 1269 self.fp = file 1270 self.filename = getattr(file, 'name', None) 1271 self._fileRefCnt = 1 1272 self._lock = threading.RLock() 1273 self._seekable = True 1274 self._writing = False 1275 1276 try: 1277 if mode == 'r': 1278 self._RealGetContents() 1279 elif mode in ('w', 'x'): 1280 # set the modified flag so central directory gets written 1281 # even if no files are added to the archive 1282 self._didModify = True 1283 try: 1284 self.start_dir = self.fp.tell() 1285 except (AttributeError, OSError): 1286 self.fp = _Tellable(self.fp) 1287 self.start_dir = 0 1288 self._seekable = False 1289 else: 1290 # Some file-like objects can provide tell() but not seek() 1291 try: 1292 self.fp.seek(self.start_dir) 1293 except (AttributeError, OSError): 1294 self._seekable = False 1295 elif mode == 'a': 1296 try: 1297 # See if file is a zip file 1298 self._RealGetContents() 1299 # seek to start of directory and overwrite 1300 self.fp.seek(self.start_dir) 1301 except BadZipFile: 1302 # file is not a zip file, just append 1303 self.fp.seek(0, 2) 1304 1305 # set the modified flag so central directory gets written 1306 # even if no files are added to the archive 1307 self._didModify = True 1308 self.start_dir = self.fp.tell() 1309 else: 1310 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") 1311 except: 1312 fp = self.fp 1313 self.fp = None 1314 self._fpclose(fp) 1315 raise 1316 1317 def __enter__(self): 1318 return self 1319 1320 def __exit__(self, type, value, traceback): 1321 self.close() 1322 1323 def __repr__(self): 1324 result = ['<%s.%s' % (self.__class__.__module__, 1325 self.__class__.__qualname__)] 1326 if self.fp is not None: 1327 if self._filePassed: 1328 result.append(' file=%r' % self.fp) 1329 elif self.filename is not None: 1330 result.append(' filename=%r' % self.filename) 1331 result.append(' mode=%r' % self.mode) 1332 else: 1333 result.append(' [closed]') 1334 result.append('>') 1335 return ''.join(result) 1336 1337 def _RealGetContents(self): 1338 """Read in the table of contents for the ZIP file.""" 1339 fp = self.fp 1340 try: 1341 endrec = _EndRecData(fp) 1342 except OSError: 1343 raise BadZipFile("File is not a zip file") 1344 if not endrec: 1345 raise BadZipFile("File is not a zip file") 1346 if self.debug > 1: 1347 print(endrec) 1348 size_cd = endrec[_ECD_SIZE] # bytes in central directory 1349 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 1350 self._comment = endrec[_ECD_COMMENT] # archive comment 1351 1352 # "concat" is zero, unless zip was concatenated to another file 1353 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 1354 if endrec[_ECD_SIGNATURE] == stringEndArchive64: 1355 # If Zip64 extension structures are present, account for them 1356 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 1357 1358 if self.debug > 2: 1359 inferred = concat + offset_cd 1360 print("given, inferred, offset", offset_cd, inferred, concat) 1361 # self.start_dir: Position of start of central directory 1362 self.start_dir = offset_cd + concat 1363 fp.seek(self.start_dir, 0) 1364 data = fp.read(size_cd) 1365 fp = io.BytesIO(data) 1366 total = 0 1367 while total < size_cd: 1368 centdir = fp.read(sizeCentralDir) 1369 if len(centdir) != sizeCentralDir: 1370 raise BadZipFile("Truncated central directory") 1371 centdir = struct.unpack(structCentralDir, centdir) 1372 if centdir[_CD_SIGNATURE] != stringCentralDir: 1373 raise BadZipFile("Bad magic number for central directory") 1374 if self.debug > 2: 1375 print(centdir) 1376 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 1377 flags = centdir[5] 1378 if flags & 0x800: 1379 # UTF-8 file names extension 1380 filename = filename.decode('utf-8') 1381 else: 1382 # Historical ZIP filename encoding 1383 filename = filename.decode('cp437') 1384 # Create ZipInfo instance to store file information 1385 x = ZipInfo(filename) 1386 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 1387 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 1388 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 1389 (x.create_version, x.create_system, x.extract_version, x.reserved, 1390 x.flag_bits, x.compress_type, t, d, 1391 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 1392 if x.extract_version > MAX_EXTRACT_VERSION: 1393 raise NotImplementedError("zip file version %.1f" % 1394 (x.extract_version / 10)) 1395 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 1396 # Convert date/time code to (year, month, day, hour, min, sec) 1397 x._raw_time = t 1398 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 1399 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 1400 1401 x._decodeExtra() 1402 x.header_offset = x.header_offset + concat 1403 self.filelist.append(x) 1404 self.NameToInfo[x.filename] = x 1405 1406 # update total bytes read from central directory 1407 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 1408 + centdir[_CD_EXTRA_FIELD_LENGTH] 1409 + centdir[_CD_COMMENT_LENGTH]) 1410 1411 if self.debug > 2: 1412 print("total", total) 1413 1414 1415 def namelist(self): 1416 """Return a list of file names in the archive.""" 1417 return [data.filename for data in self.filelist] 1418 1419 def infolist(self): 1420 """Return a list of class ZipInfo instances for files in the 1421 archive.""" 1422 return self.filelist 1423 1424 def printdir(self, file=None): 1425 """Print a table of contents for the zip file.""" 1426 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), 1427 file=file) 1428 for zinfo in self.filelist: 1429 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 1430 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), 1431 file=file) 1432 1433 def testzip(self): 1434 """Read all the files and check the CRC.""" 1435 chunk_size = 2 ** 20 1436 for zinfo in self.filelist: 1437 try: 1438 # Read by chunks, to avoid an OverflowError or a 1439 # MemoryError with very large embedded files. 1440 with self.open(zinfo.filename, "r") as f: 1441 while f.read(chunk_size): # Check CRC-32 1442 pass 1443 except BadZipFile: 1444 return zinfo.filename 1445 1446 def getinfo(self, name): 1447 """Return the instance of ZipInfo given 'name'.""" 1448 info = self.NameToInfo.get(name) 1449 if info is None: 1450 raise KeyError( 1451 'There is no item named %r in the archive' % name) 1452 1453 return info 1454 1455 def setpassword(self, pwd): 1456 """Set default password for encrypted files.""" 1457 if pwd and not isinstance(pwd, bytes): 1458 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1459 if pwd: 1460 self.pwd = pwd 1461 else: 1462 self.pwd = None 1463 1464 @property 1465 def comment(self): 1466 """The comment text associated with the ZIP file.""" 1467 return self._comment 1468 1469 @comment.setter 1470 def comment(self, comment): 1471 if not isinstance(comment, bytes): 1472 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__) 1473 # check for valid comment length 1474 if len(comment) > ZIP_MAX_COMMENT: 1475 import warnings 1476 warnings.warn('Archive comment is too long; truncating to %d bytes' 1477 % ZIP_MAX_COMMENT, stacklevel=2) 1478 comment = comment[:ZIP_MAX_COMMENT] 1479 self._comment = comment 1480 self._didModify = True 1481 1482 def read(self, name, pwd=None): 1483 """Return file bytes for name.""" 1484 with self.open(name, "r", pwd) as fp: 1485 return fp.read() 1486 1487 def open(self, name, mode="r", pwd=None, *, force_zip64=False): 1488 """Return file-like object for 'name'. 1489 1490 name is a string for the file name within the ZIP file, or a ZipInfo 1491 object. 1492 1493 mode should be 'r' to read a file already in the ZIP file, or 'w' to 1494 write to a file newly added to the archive. 1495 1496 pwd is the password to decrypt files (only used for reading). 1497 1498 When writing, if the file size is not known in advance but may exceed 1499 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large 1500 files. If the size is known in advance, it is best to pass a ZipInfo 1501 instance for name, with zinfo.file_size set. 1502 """ 1503 if mode not in {"r", "w"}: 1504 raise ValueError('open() requires mode "r" or "w"') 1505 if pwd and not isinstance(pwd, bytes): 1506 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1507 if pwd and (mode == "w"): 1508 raise ValueError("pwd is only supported for reading files") 1509 if not self.fp: 1510 raise ValueError( 1511 "Attempt to use ZIP archive that was already closed") 1512 1513 # Make sure we have an info object 1514 if isinstance(name, ZipInfo): 1515 # 'name' is already an info object 1516 zinfo = name 1517 elif mode == 'w': 1518 zinfo = ZipInfo(name) 1519 zinfo.compress_type = self.compression 1520 zinfo._compresslevel = self.compresslevel 1521 else: 1522 # Get info object for name 1523 zinfo = self.getinfo(name) 1524 1525 if mode == 'w': 1526 return self._open_to_write(zinfo, force_zip64=force_zip64) 1527 1528 if self._writing: 1529 raise ValueError("Can't read from the ZIP file while there " 1530 "is an open writing handle on it. " 1531 "Close the writing handle before trying to read.") 1532 1533 # Open for reading: 1534 self._fileRefCnt += 1 1535 zef_file = _SharedFile(self.fp, zinfo.header_offset, 1536 self._fpclose, self._lock, lambda: self._writing) 1537 try: 1538 # Skip the file header: 1539 fheader = zef_file.read(sizeFileHeader) 1540 if len(fheader) != sizeFileHeader: 1541 raise BadZipFile("Truncated file header") 1542 fheader = struct.unpack(structFileHeader, fheader) 1543 if fheader[_FH_SIGNATURE] != stringFileHeader: 1544 raise BadZipFile("Bad magic number for file header") 1545 1546 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 1547 if fheader[_FH_EXTRA_FIELD_LENGTH]: 1548 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 1549 1550 if zinfo.flag_bits & 0x20: 1551 # Zip 2.7: compressed patched data 1552 raise NotImplementedError("compressed patched data (flag bit 5)") 1553 1554 if zinfo.flag_bits & 0x40: 1555 # strong encryption 1556 raise NotImplementedError("strong encryption (flag bit 6)") 1557 1558 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800: 1559 # UTF-8 filename 1560 fname_str = fname.decode("utf-8") 1561 else: 1562 fname_str = fname.decode("cp437") 1563 1564 if fname_str != zinfo.orig_filename: 1565 raise BadZipFile( 1566 'File name in directory %r and header %r differ.' 1567 % (zinfo.orig_filename, fname)) 1568 1569 # check for encrypted flag & handle password 1570 is_encrypted = zinfo.flag_bits & 0x1 1571 if is_encrypted: 1572 if not pwd: 1573 pwd = self.pwd 1574 if not pwd: 1575 raise RuntimeError("File %r is encrypted, password " 1576 "required for extraction" % name) 1577 else: 1578 pwd = None 1579 1580 return ZipExtFile(zef_file, mode, zinfo, pwd, True) 1581 except: 1582 zef_file.close() 1583 raise 1584 1585 def _open_to_write(self, zinfo, force_zip64=False): 1586 if force_zip64 and not self._allowZip64: 1587 raise ValueError( 1588 "force_zip64 is True, but allowZip64 was False when opening " 1589 "the ZIP file." 1590 ) 1591 if self._writing: 1592 raise ValueError("Can't write to the ZIP file while there is " 1593 "another write handle open on it. " 1594 "Close the first handle before opening another.") 1595 1596 # Sizes and CRC are overwritten with correct data after processing the file 1597 if not hasattr(zinfo, 'file_size'): 1598 zinfo.file_size = 0 1599 zinfo.compress_size = 0 1600 zinfo.CRC = 0 1601 1602 zinfo.flag_bits = 0x00 1603 if zinfo.compress_type == ZIP_LZMA: 1604 # Compressed data includes an end-of-stream (EOS) marker 1605 zinfo.flag_bits |= 0x02 1606 if not self._seekable: 1607 zinfo.flag_bits |= 0x08 1608 1609 if not zinfo.external_attr: 1610 zinfo.external_attr = 0o600 << 16 # permissions: ?rw------- 1611 1612 # Compressed size can be larger than uncompressed size 1613 zip64 = self._allowZip64 and \ 1614 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT) 1615 1616 if self._seekable: 1617 self.fp.seek(self.start_dir) 1618 zinfo.header_offset = self.fp.tell() 1619 1620 self._writecheck(zinfo) 1621 self._didModify = True 1622 1623 self.fp.write(zinfo.FileHeader(zip64)) 1624 1625 self._writing = True 1626 return _ZipWriteFile(self, zinfo, zip64) 1627 1628 def extract(self, member, path=None, pwd=None): 1629 """Extract a member from the archive to the current working directory, 1630 using its full name. Its file information is extracted as accurately 1631 as possible. `member' may be a filename or a ZipInfo object. You can 1632 specify a different directory using `path'. 1633 """ 1634 if path is None: 1635 path = os.getcwd() 1636 else: 1637 path = os.fspath(path) 1638 1639 return self._extract_member(member, path, pwd) 1640 1641 def extractall(self, path=None, members=None, pwd=None): 1642 """Extract all members from the archive to the current working 1643 directory. `path' specifies a different directory to extract to. 1644 `members' is optional and must be a subset of the list returned 1645 by namelist(). 1646 """ 1647 if members is None: 1648 members = self.namelist() 1649 1650 if path is None: 1651 path = os.getcwd() 1652 else: 1653 path = os.fspath(path) 1654 1655 for zipinfo in members: 1656 self._extract_member(zipinfo, path, pwd) 1657 1658 @classmethod 1659 def _sanitize_windows_name(cls, arcname, pathsep): 1660 """Replace bad characters and remove trailing dots from parts.""" 1661 table = cls._windows_illegal_name_trans_table 1662 if not table: 1663 illegal = ':<>|"?*' 1664 table = str.maketrans(illegal, '_' * len(illegal)) 1665 cls._windows_illegal_name_trans_table = table 1666 arcname = arcname.translate(table) 1667 # remove trailing dots 1668 arcname = (x.rstrip('.') for x in arcname.split(pathsep)) 1669 # rejoin, removing empty parts. 1670 arcname = pathsep.join(x for x in arcname if x) 1671 return arcname 1672 1673 def _extract_member(self, member, targetpath, pwd): 1674 """Extract the ZipInfo object 'member' to a physical 1675 file on the path targetpath. 1676 """ 1677 if not isinstance(member, ZipInfo): 1678 member = self.getinfo(member) 1679 1680 # build the destination pathname, replacing 1681 # forward slashes to platform specific separators. 1682 arcname = member.filename.replace('/', os.path.sep) 1683 1684 if os.path.altsep: 1685 arcname = arcname.replace(os.path.altsep, os.path.sep) 1686 # interpret absolute pathname as relative, remove drive letter or 1687 # UNC path, redundant separators, "." and ".." components. 1688 arcname = os.path.splitdrive(arcname)[1] 1689 invalid_path_parts = ('', os.path.curdir, os.path.pardir) 1690 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 1691 if x not in invalid_path_parts) 1692 if os.path.sep == '\\': 1693 # filter illegal characters on Windows 1694 arcname = self._sanitize_windows_name(arcname, os.path.sep) 1695 1696 targetpath = os.path.join(targetpath, arcname) 1697 targetpath = os.path.normpath(targetpath) 1698 1699 # Create all upper directories if necessary. 1700 upperdirs = os.path.dirname(targetpath) 1701 if upperdirs and not os.path.exists(upperdirs): 1702 os.makedirs(upperdirs) 1703 1704 if member.is_dir(): 1705 if not os.path.isdir(targetpath): 1706 os.mkdir(targetpath) 1707 return targetpath 1708 1709 with self.open(member, pwd=pwd) as source, \ 1710 open(targetpath, "wb") as target: 1711 shutil.copyfileobj(source, target) 1712 1713 return targetpath 1714 1715 def _writecheck(self, zinfo): 1716 """Check for errors before writing a file to the archive.""" 1717 if zinfo.filename in self.NameToInfo: 1718 import warnings 1719 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3) 1720 if self.mode not in ('w', 'x', 'a'): 1721 raise ValueError("write() requires mode 'w', 'x', or 'a'") 1722 if not self.fp: 1723 raise ValueError( 1724 "Attempt to write ZIP archive that was already closed") 1725 _check_compression(zinfo.compress_type) 1726 if not self._allowZip64: 1727 requires_zip64 = None 1728 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT: 1729 requires_zip64 = "Files count" 1730 elif zinfo.file_size > ZIP64_LIMIT: 1731 requires_zip64 = "Filesize" 1732 elif zinfo.header_offset > ZIP64_LIMIT: 1733 requires_zip64 = "Zipfile size" 1734 if requires_zip64: 1735 raise LargeZipFile(requires_zip64 + 1736 " would require ZIP64 extensions") 1737 1738 def write(self, filename, arcname=None, 1739 compress_type=None, compresslevel=None): 1740 """Put the bytes from filename into the archive under the name 1741 arcname.""" 1742 if not self.fp: 1743 raise ValueError( 1744 "Attempt to write to ZIP archive that was already closed") 1745 if self._writing: 1746 raise ValueError( 1747 "Can't write to ZIP archive while an open writing handle exists" 1748 ) 1749 1750 zinfo = ZipInfo.from_file(filename, arcname, 1751 strict_timestamps=self._strict_timestamps) 1752 1753 if zinfo.is_dir(): 1754 zinfo.compress_size = 0 1755 zinfo.CRC = 0 1756 else: 1757 if compress_type is not None: 1758 zinfo.compress_type = compress_type 1759 else: 1760 zinfo.compress_type = self.compression 1761 1762 if compresslevel is not None: 1763 zinfo._compresslevel = compresslevel 1764 else: 1765 zinfo._compresslevel = self.compresslevel 1766 1767 if zinfo.is_dir(): 1768 with self._lock: 1769 if self._seekable: 1770 self.fp.seek(self.start_dir) 1771 zinfo.header_offset = self.fp.tell() # Start of header bytes 1772 if zinfo.compress_type == ZIP_LZMA: 1773 # Compressed data includes an end-of-stream (EOS) marker 1774 zinfo.flag_bits |= 0x02 1775 1776 self._writecheck(zinfo) 1777 self._didModify = True 1778 1779 self.filelist.append(zinfo) 1780 self.NameToInfo[zinfo.filename] = zinfo 1781 self.fp.write(zinfo.FileHeader(False)) 1782 self.start_dir = self.fp.tell() 1783 else: 1784 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: 1785 shutil.copyfileobj(src, dest, 1024*8) 1786 1787 def writestr(self, zinfo_or_arcname, data, 1788 compress_type=None, compresslevel=None): 1789 """Write a file into the archive. The contents is 'data', which 1790 may be either a 'str' or a 'bytes' instance; if it is a 'str', 1791 it is encoded as UTF-8 first. 1792 'zinfo_or_arcname' is either a ZipInfo instance or 1793 the name of the file in the archive.""" 1794 if isinstance(data, str): 1795 data = data.encode("utf-8") 1796 if not isinstance(zinfo_or_arcname, ZipInfo): 1797 zinfo = ZipInfo(filename=zinfo_or_arcname, 1798 date_time=time.localtime(time.time())[:6]) 1799 zinfo.compress_type = self.compression 1800 zinfo._compresslevel = self.compresslevel 1801 if zinfo.filename[-1] == '/': 1802 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x 1803 zinfo.external_attr |= 0x10 # MS-DOS directory flag 1804 else: 1805 zinfo.external_attr = 0o600 << 16 # ?rw------- 1806 else: 1807 zinfo = zinfo_or_arcname 1808 1809 if not self.fp: 1810 raise ValueError( 1811 "Attempt to write to ZIP archive that was already closed") 1812 if self._writing: 1813 raise ValueError( 1814 "Can't write to ZIP archive while an open writing handle exists." 1815 ) 1816 1817 if compress_type is not None: 1818 zinfo.compress_type = compress_type 1819 1820 if compresslevel is not None: 1821 zinfo._compresslevel = compresslevel 1822 1823 zinfo.file_size = len(data) # Uncompressed size 1824 with self._lock: 1825 with self.open(zinfo, mode='w') as dest: 1826 dest.write(data) 1827 1828 def __del__(self): 1829 """Call the "close()" method in case the user forgot.""" 1830 self.close() 1831 1832 def close(self): 1833 """Close the file, and for mode 'w', 'x' and 'a' write the ending 1834 records.""" 1835 if self.fp is None: 1836 return 1837 1838 if self._writing: 1839 raise ValueError("Can't close the ZIP file while there is " 1840 "an open writing handle on it. " 1841 "Close the writing handle before closing the zip.") 1842 1843 try: 1844 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records 1845 with self._lock: 1846 if self._seekable: 1847 self.fp.seek(self.start_dir) 1848 self._write_end_record() 1849 finally: 1850 fp = self.fp 1851 self.fp = None 1852 self._fpclose(fp) 1853 1854 def _write_end_record(self): 1855 for zinfo in self.filelist: # write central directory 1856 dt = zinfo.date_time 1857 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1858 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1859 extra = [] 1860 if zinfo.file_size > ZIP64_LIMIT \ 1861 or zinfo.compress_size > ZIP64_LIMIT: 1862 extra.append(zinfo.file_size) 1863 extra.append(zinfo.compress_size) 1864 file_size = 0xffffffff 1865 compress_size = 0xffffffff 1866 else: 1867 file_size = zinfo.file_size 1868 compress_size = zinfo.compress_size 1869 1870 if zinfo.header_offset > ZIP64_LIMIT: 1871 extra.append(zinfo.header_offset) 1872 header_offset = 0xffffffff 1873 else: 1874 header_offset = zinfo.header_offset 1875 1876 extra_data = zinfo.extra 1877 min_version = 0 1878 if extra: 1879 # Append a ZIP64 field to the extra's 1880 extra_data = _strip_extra(extra_data, (1,)) 1881 extra_data = struct.pack( 1882 '<HH' + 'Q'*len(extra), 1883 1, 8*len(extra), *extra) + extra_data 1884 1885 min_version = ZIP64_VERSION 1886 1887 if zinfo.compress_type == ZIP_BZIP2: 1888 min_version = max(BZIP2_VERSION, min_version) 1889 elif zinfo.compress_type == ZIP_LZMA: 1890 min_version = max(LZMA_VERSION, min_version) 1891 1892 extract_version = max(min_version, zinfo.extract_version) 1893 create_version = max(min_version, zinfo.create_version) 1894 try: 1895 filename, flag_bits = zinfo._encodeFilenameFlags() 1896 centdir = struct.pack(structCentralDir, 1897 stringCentralDir, create_version, 1898 zinfo.create_system, extract_version, zinfo.reserved, 1899 flag_bits, zinfo.compress_type, dostime, dosdate, 1900 zinfo.CRC, compress_size, file_size, 1901 len(filename), len(extra_data), len(zinfo.comment), 1902 0, zinfo.internal_attr, zinfo.external_attr, 1903 header_offset) 1904 except DeprecationWarning: 1905 print((structCentralDir, stringCentralDir, create_version, 1906 zinfo.create_system, extract_version, zinfo.reserved, 1907 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, 1908 zinfo.CRC, compress_size, file_size, 1909 len(zinfo.filename), len(extra_data), len(zinfo.comment), 1910 0, zinfo.internal_attr, zinfo.external_attr, 1911 header_offset), file=sys.stderr) 1912 raise 1913 self.fp.write(centdir) 1914 self.fp.write(filename) 1915 self.fp.write(extra_data) 1916 self.fp.write(zinfo.comment) 1917 1918 pos2 = self.fp.tell() 1919 # Write end-of-zip-archive record 1920 centDirCount = len(self.filelist) 1921 centDirSize = pos2 - self.start_dir 1922 centDirOffset = self.start_dir 1923 requires_zip64 = None 1924 if centDirCount > ZIP_FILECOUNT_LIMIT: 1925 requires_zip64 = "Files count" 1926 elif centDirOffset > ZIP64_LIMIT: 1927 requires_zip64 = "Central directory offset" 1928 elif centDirSize > ZIP64_LIMIT: 1929 requires_zip64 = "Central directory size" 1930 if requires_zip64: 1931 # Need to write the ZIP64 end-of-archive records 1932 if not self._allowZip64: 1933 raise LargeZipFile(requires_zip64 + 1934 " would require ZIP64 extensions") 1935 zip64endrec = struct.pack( 1936 structEndArchive64, stringEndArchive64, 1937 44, 45, 45, 0, 0, centDirCount, centDirCount, 1938 centDirSize, centDirOffset) 1939 self.fp.write(zip64endrec) 1940 1941 zip64locrec = struct.pack( 1942 structEndArchive64Locator, 1943 stringEndArchive64Locator, 0, pos2, 1) 1944 self.fp.write(zip64locrec) 1945 centDirCount = min(centDirCount, 0xFFFF) 1946 centDirSize = min(centDirSize, 0xFFFFFFFF) 1947 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 1948 1949 endrec = struct.pack(structEndArchive, stringEndArchive, 1950 0, 0, centDirCount, centDirCount, 1951 centDirSize, centDirOffset, len(self._comment)) 1952 self.fp.write(endrec) 1953 self.fp.write(self._comment) 1954 self.fp.flush() 1955 1956 def _fpclose(self, fp): 1957 assert self._fileRefCnt > 0 1958 self._fileRefCnt -= 1 1959 if not self._fileRefCnt and not self._filePassed: 1960 fp.close() 1961 1962 1963class PyZipFile(ZipFile): 1964 """Class to create ZIP archives with Python library files and packages.""" 1965 1966 def __init__(self, file, mode="r", compression=ZIP_STORED, 1967 allowZip64=True, optimize=-1): 1968 ZipFile.__init__(self, file, mode=mode, compression=compression, 1969 allowZip64=allowZip64) 1970 self._optimize = optimize 1971 1972 def writepy(self, pathname, basename="", filterfunc=None): 1973 """Add all files from "pathname" to the ZIP archive. 1974 1975 If pathname is a package directory, search the directory and 1976 all package subdirectories recursively for all *.py and enter 1977 the modules into the archive. If pathname is a plain 1978 directory, listdir *.py and enter all modules. Else, pathname 1979 must be a Python *.py file and the module will be put into the 1980 archive. Added modules are always module.pyc. 1981 This method will compile the module.py into module.pyc if 1982 necessary. 1983 If filterfunc(pathname) is given, it is called with every argument. 1984 When it is False, the file or directory is skipped. 1985 """ 1986 pathname = os.fspath(pathname) 1987 if filterfunc and not filterfunc(pathname): 1988 if self.debug: 1989 label = 'path' if os.path.isdir(pathname) else 'file' 1990 print('%s %r skipped by filterfunc' % (label, pathname)) 1991 return 1992 dir, name = os.path.split(pathname) 1993 if os.path.isdir(pathname): 1994 initname = os.path.join(pathname, "__init__.py") 1995 if os.path.isfile(initname): 1996 # This is a package directory, add it 1997 if basename: 1998 basename = "%s/%s" % (basename, name) 1999 else: 2000 basename = name 2001 if self.debug: 2002 print("Adding package in", pathname, "as", basename) 2003 fname, arcname = self._get_codename(initname[0:-3], basename) 2004 if self.debug: 2005 print("Adding", arcname) 2006 self.write(fname, arcname) 2007 dirlist = sorted(os.listdir(pathname)) 2008 dirlist.remove("__init__.py") 2009 # Add all *.py files and package subdirectories 2010 for filename in dirlist: 2011 path = os.path.join(pathname, filename) 2012 root, ext = os.path.splitext(filename) 2013 if os.path.isdir(path): 2014 if os.path.isfile(os.path.join(path, "__init__.py")): 2015 # This is a package directory, add it 2016 self.writepy(path, basename, 2017 filterfunc=filterfunc) # Recursive call 2018 elif ext == ".py": 2019 if filterfunc and not filterfunc(path): 2020 if self.debug: 2021 print('file %r skipped by filterfunc' % path) 2022 continue 2023 fname, arcname = self._get_codename(path[0:-3], 2024 basename) 2025 if self.debug: 2026 print("Adding", arcname) 2027 self.write(fname, arcname) 2028 else: 2029 # This is NOT a package directory, add its files at top level 2030 if self.debug: 2031 print("Adding files from directory", pathname) 2032 for filename in sorted(os.listdir(pathname)): 2033 path = os.path.join(pathname, filename) 2034 root, ext = os.path.splitext(filename) 2035 if ext == ".py": 2036 if filterfunc and not filterfunc(path): 2037 if self.debug: 2038 print('file %r skipped by filterfunc' % path) 2039 continue 2040 fname, arcname = self._get_codename(path[0:-3], 2041 basename) 2042 if self.debug: 2043 print("Adding", arcname) 2044 self.write(fname, arcname) 2045 else: 2046 if pathname[-3:] != ".py": 2047 raise RuntimeError( 2048 'Files added with writepy() must end with ".py"') 2049 fname, arcname = self._get_codename(pathname[0:-3], basename) 2050 if self.debug: 2051 print("Adding file", arcname) 2052 self.write(fname, arcname) 2053 2054 def _get_codename(self, pathname, basename): 2055 """Return (filename, archivename) for the path. 2056 2057 Given a module name path, return the correct file path and 2058 archive name, compiling if necessary. For example, given 2059 /python/lib/string, return (/python/lib/string.pyc, string). 2060 """ 2061 def _compile(file, optimize=-1): 2062 import py_compile 2063 if self.debug: 2064 print("Compiling", file) 2065 try: 2066 py_compile.compile(file, doraise=True, optimize=optimize) 2067 except py_compile.PyCompileError as err: 2068 print(err.msg) 2069 return False 2070 return True 2071 2072 file_py = pathname + ".py" 2073 file_pyc = pathname + ".pyc" 2074 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='') 2075 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1) 2076 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2) 2077 if self._optimize == -1: 2078 # legacy mode: use whatever file is present 2079 if (os.path.isfile(file_pyc) and 2080 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime): 2081 # Use .pyc file. 2082 arcname = fname = file_pyc 2083 elif (os.path.isfile(pycache_opt0) and 2084 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime): 2085 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2086 # file name in the archive. 2087 fname = pycache_opt0 2088 arcname = file_pyc 2089 elif (os.path.isfile(pycache_opt1) and 2090 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime): 2091 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2092 # file name in the archive. 2093 fname = pycache_opt1 2094 arcname = file_pyc 2095 elif (os.path.isfile(pycache_opt2) and 2096 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime): 2097 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2098 # file name in the archive. 2099 fname = pycache_opt2 2100 arcname = file_pyc 2101 else: 2102 # Compile py into PEP 3147 pyc file. 2103 if _compile(file_py): 2104 if sys.flags.optimize == 0: 2105 fname = pycache_opt0 2106 elif sys.flags.optimize == 1: 2107 fname = pycache_opt1 2108 else: 2109 fname = pycache_opt2 2110 arcname = file_pyc 2111 else: 2112 fname = arcname = file_py 2113 else: 2114 # new mode: use given optimization level 2115 if self._optimize == 0: 2116 fname = pycache_opt0 2117 arcname = file_pyc 2118 else: 2119 arcname = file_pyc 2120 if self._optimize == 1: 2121 fname = pycache_opt1 2122 elif self._optimize == 2: 2123 fname = pycache_opt2 2124 else: 2125 msg = "invalid value for 'optimize': {!r}".format(self._optimize) 2126 raise ValueError(msg) 2127 if not (os.path.isfile(fname) and 2128 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime): 2129 if not _compile(file_py, optimize=self._optimize): 2130 fname = arcname = file_py 2131 archivename = os.path.split(arcname)[1] 2132 if basename: 2133 archivename = "%s/%s" % (basename, archivename) 2134 return (fname, archivename) 2135 2136 2137def _parents(path): 2138 """ 2139 Given a path with elements separated by 2140 posixpath.sep, generate all parents of that path. 2141 2142 >>> list(_parents('b/d')) 2143 ['b'] 2144 >>> list(_parents('/b/d/')) 2145 ['/b'] 2146 >>> list(_parents('b/d/f/')) 2147 ['b/d', 'b'] 2148 >>> list(_parents('b')) 2149 [] 2150 >>> list(_parents('')) 2151 [] 2152 """ 2153 return itertools.islice(_ancestry(path), 1, None) 2154 2155 2156def _ancestry(path): 2157 """ 2158 Given a path with elements separated by 2159 posixpath.sep, generate all elements of that path 2160 2161 >>> list(_ancestry('b/d')) 2162 ['b/d', 'b'] 2163 >>> list(_ancestry('/b/d/')) 2164 ['/b/d', '/b'] 2165 >>> list(_ancestry('b/d/f/')) 2166 ['b/d/f', 'b/d', 'b'] 2167 >>> list(_ancestry('b')) 2168 ['b'] 2169 >>> list(_ancestry('')) 2170 [] 2171 """ 2172 path = path.rstrip(posixpath.sep) 2173 while path and path != posixpath.sep: 2174 yield path 2175 path, tail = posixpath.split(path) 2176 2177 2178_dedupe = dict.fromkeys 2179"""Deduplicate an iterable in original order""" 2180 2181 2182def _difference(minuend, subtrahend): 2183 """ 2184 Return items in minuend not in subtrahend, retaining order 2185 with O(1) lookup. 2186 """ 2187 return itertools.filterfalse(set(subtrahend).__contains__, minuend) 2188 2189 2190class CompleteDirs(ZipFile): 2191 """ 2192 A ZipFile subclass that ensures that implied directories 2193 are always included in the namelist. 2194 """ 2195 2196 @staticmethod 2197 def _implied_dirs(names): 2198 parents = itertools.chain.from_iterable(map(_parents, names)) 2199 as_dirs = (p + posixpath.sep for p in parents) 2200 return _dedupe(_difference(as_dirs, names)) 2201 2202 def namelist(self): 2203 names = super(CompleteDirs, self).namelist() 2204 return names + list(self._implied_dirs(names)) 2205 2206 def _name_set(self): 2207 return set(self.namelist()) 2208 2209 def resolve_dir(self, name): 2210 """ 2211 If the name represents a directory, return that name 2212 as a directory (with the trailing slash). 2213 """ 2214 names = self._name_set() 2215 dirname = name + '/' 2216 dir_match = name not in names and dirname in names 2217 return dirname if dir_match else name 2218 2219 @classmethod 2220 def make(cls, source): 2221 """ 2222 Given a source (filename or zipfile), return an 2223 appropriate CompleteDirs subclass. 2224 """ 2225 if isinstance(source, CompleteDirs): 2226 return source 2227 2228 if not isinstance(source, ZipFile): 2229 return cls(source) 2230 2231 # Only allow for FastPath when supplied zipfile is read-only 2232 if 'r' not in source.mode: 2233 cls = CompleteDirs 2234 2235 res = cls.__new__(cls) 2236 vars(res).update(vars(source)) 2237 return res 2238 2239 2240class FastLookup(CompleteDirs): 2241 """ 2242 ZipFile subclass to ensure implicit 2243 dirs exist and are resolved rapidly. 2244 """ 2245 def namelist(self): 2246 with contextlib.suppress(AttributeError): 2247 return self.__names 2248 self.__names = super(FastLookup, self).namelist() 2249 return self.__names 2250 2251 def _name_set(self): 2252 with contextlib.suppress(AttributeError): 2253 return self.__lookup 2254 self.__lookup = super(FastLookup, self)._name_set() 2255 return self.__lookup 2256 2257 2258class Path: 2259 """ 2260 A pathlib-compatible interface for zip files. 2261 2262 Consider a zip file with this structure:: 2263 2264 . 2265 ├── a.txt 2266 └── b 2267 ├── c.txt 2268 └── d 2269 └── e.txt 2270 2271 >>> data = io.BytesIO() 2272 >>> zf = ZipFile(data, 'w') 2273 >>> zf.writestr('a.txt', 'content of a') 2274 >>> zf.writestr('b/c.txt', 'content of c') 2275 >>> zf.writestr('b/d/e.txt', 'content of e') 2276 >>> zf.filename = 'abcde.zip' 2277 2278 Path accepts the zipfile object itself or a filename 2279 2280 >>> root = Path(zf) 2281 2282 From there, several path operations are available. 2283 2284 Directory iteration (including the zip file itself): 2285 2286 >>> a, b = root.iterdir() 2287 >>> a 2288 Path('abcde.zip', 'a.txt') 2289 >>> b 2290 Path('abcde.zip', 'b/') 2291 2292 name property: 2293 2294 >>> b.name 2295 'b' 2296 2297 join with divide operator: 2298 2299 >>> c = b / 'c.txt' 2300 >>> c 2301 Path('abcde.zip', 'b/c.txt') 2302 >>> c.name 2303 'c.txt' 2304 2305 Read text: 2306 2307 >>> c.read_text() 2308 'content of c' 2309 2310 existence: 2311 2312 >>> c.exists() 2313 True 2314 >>> (b / 'missing.txt').exists() 2315 False 2316 2317 Coercion to string: 2318 2319 >>> str(c) 2320 'abcde.zip/b/c.txt' 2321 """ 2322 2323 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" 2324 2325 def __init__(self, root, at=""): 2326 self.root = FastLookup.make(root) 2327 self.at = at 2328 2329 @property 2330 def open(self): 2331 return functools.partial(self.root.open, self.at) 2332 2333 @property 2334 def name(self): 2335 return posixpath.basename(self.at.rstrip("/")) 2336 2337 def read_text(self, *args, **kwargs): 2338 with self.open() as strm: 2339 return io.TextIOWrapper(strm, *args, **kwargs).read() 2340 2341 def read_bytes(self): 2342 with self.open() as strm: 2343 return strm.read() 2344 2345 def _is_child(self, path): 2346 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") 2347 2348 def _next(self, at): 2349 return Path(self.root, at) 2350 2351 def is_dir(self): 2352 return not self.at or self.at.endswith("/") 2353 2354 def is_file(self): 2355 return not self.is_dir() 2356 2357 def exists(self): 2358 return self.at in self.root._name_set() 2359 2360 def iterdir(self): 2361 if not self.is_dir(): 2362 raise ValueError("Can't listdir a file") 2363 subs = map(self._next, self.root.namelist()) 2364 return filter(self._is_child, subs) 2365 2366 def __str__(self): 2367 return posixpath.join(self.root.filename, self.at) 2368 2369 def __repr__(self): 2370 return self.__repr.format(self=self) 2371 2372 def joinpath(self, add): 2373 next = posixpath.join(self.at, add) 2374 return self._next(self.root.resolve_dir(next)) 2375 2376 __truediv__ = joinpath 2377 2378 @property 2379 def parent(self): 2380 parent_at = posixpath.dirname(self.at.rstrip('/')) 2381 if parent_at: 2382 parent_at += '/' 2383 return self._next(parent_at) 2384 2385 2386def main(args=None): 2387 import argparse 2388 2389 description = 'A simple command-line interface for zipfile module.' 2390 parser = argparse.ArgumentParser(description=description) 2391 group = parser.add_mutually_exclusive_group(required=True) 2392 group.add_argument('-l', '--list', metavar='<zipfile>', 2393 help='Show listing of a zipfile') 2394 group.add_argument('-e', '--extract', nargs=2, 2395 metavar=('<zipfile>', '<output_dir>'), 2396 help='Extract zipfile into target dir') 2397 group.add_argument('-c', '--create', nargs='+', 2398 metavar=('<name>', '<file>'), 2399 help='Create zipfile from sources') 2400 group.add_argument('-t', '--test', metavar='<zipfile>', 2401 help='Test if a zipfile is valid') 2402 args = parser.parse_args(args) 2403 2404 if args.test is not None: 2405 src = args.test 2406 with ZipFile(src, 'r') as zf: 2407 badfile = zf.testzip() 2408 if badfile: 2409 print("The following enclosed file is corrupted: {!r}".format(badfile)) 2410 print("Done testing") 2411 2412 elif args.list is not None: 2413 src = args.list 2414 with ZipFile(src, 'r') as zf: 2415 zf.printdir() 2416 2417 elif args.extract is not None: 2418 src, curdir = args.extract 2419 with ZipFile(src, 'r') as zf: 2420 zf.extractall(curdir) 2421 2422 elif args.create is not None: 2423 zip_name = args.create.pop(0) 2424 files = args.create 2425 2426 def addToZip(zf, path, zippath): 2427 if os.path.isfile(path): 2428 zf.write(path, zippath, ZIP_DEFLATED) 2429 elif os.path.isdir(path): 2430 if zippath: 2431 zf.write(path, zippath) 2432 for nm in sorted(os.listdir(path)): 2433 addToZip(zf, 2434 os.path.join(path, nm), os.path.join(zippath, nm)) 2435 # else: ignore 2436 2437 with ZipFile(zip_name, 'w') as zf: 2438 for path in files: 2439 zippath = os.path.basename(path) 2440 if not zippath: 2441 zippath = os.path.basename(os.path.dirname(path)) 2442 if zippath in ('', os.curdir, os.pardir): 2443 zippath = '' 2444 addToZip(zf, path, zippath) 2445 2446 2447if __name__ == "__main__": 2448 main()