library/zipfile.py at trunk · bernsteinbear.com/skybison

bernsteinbear.com / skybison
fork atom
this repo has no description
fork atom
skybison / library / zipfile.py
at trunk 2448 lines 88 kB view raw
wrap content
Max Bernstein Add license headers 4y ago
29d072a3
   1#!/usr/bin/env python3
   2# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
   3# WARNING: This is a temporary copy of code from the cpython library to
   4# facilitate bringup. Please file a task for anything you change!
   5# flake8: noqa
   6# fmt: off
   7"""
   8Read and write ZIP files.
   9
  10XXX references to utf-8 need further investigation.
  11"""
  12import binascii
  13import contextlib
  14import functools
  15import importlib.util
  16import io
  17import itertools
  18import os
  19import posixpath
  20import shutil
  21import stat
  22import struct
  23import sys
  24import threading
  25import time
  26
  27try:
  28    import zlib # We may need its compression method
  29    crc32 = zlib.crc32
  30except ImportError:
  31    zlib = None
  32    crc32 = binascii.crc32
  33
  34try:
  35    import bz2 # We may need its compression method
  36except ImportError:
  37    bz2 = None
  38
  39try:
  40    import lzma # We may need its compression method
  41except ImportError:
  42    lzma = None
  43
  44__all__ = ["BadZipFile", "BadZipfile", "error",
  45           "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
  46           "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
  47           "Path"]
  48
  49class BadZipFile(Exception):
  50    pass
  51
  52
  53class LargeZipFile(Exception):
  54    """
  55    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
  56    and those extensions are disabled.
  57    """
  58
  59error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
  60
  61
  62ZIP64_LIMIT = (1 << 31) - 1
  63ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
  64ZIP_MAX_COMMENT = (1 << 16) - 1
  65
  66# constants for Zip file compression methods
  67ZIP_STORED = 0
  68ZIP_DEFLATED = 8
  69ZIP_BZIP2 = 12
  70ZIP_LZMA = 14
  71# Other ZIP compression methods not supported
  72
  73DEFAULT_VERSION = 20
  74ZIP64_VERSION = 45
  75BZIP2_VERSION = 46
  76LZMA_VERSION = 63
  77# we recognize (but not necessarily support) all features up to that version
  78MAX_EXTRACT_VERSION = 63
  79
  80# Below are some formats and associated data for reading/writing headers using
  81# the struct module.  The names and structures of headers/records are those used
  82# in the PKWARE description of the ZIP file format:
  83#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
  84# (URL valid as of January 2008)
  85
  86# The "end of central directory" structure, magic number, size, and indices
  87# (section V.I in the format document)
  88structEndArchive = b"<4s4H2LH"
  89stringEndArchive = b"PK\005\006"
  90sizeEndCentDir = struct.calcsize(structEndArchive)
  91
  92_ECD_SIGNATURE = 0
  93_ECD_DISK_NUMBER = 1
  94_ECD_DISK_START = 2
  95_ECD_ENTRIES_THIS_DISK = 3
  96_ECD_ENTRIES_TOTAL = 4
  97_ECD_SIZE = 5
  98_ECD_OFFSET = 6
  99_ECD_COMMENT_SIZE = 7
 100# These last two indices are not part of the structure as defined in the
 101# spec, but they are used internally by this module as a convenience
 102_ECD_COMMENT = 8
 103_ECD_LOCATION = 9
 104
 105# The "central directory" structure, magic number, size, and indices
 106# of entries in the structure (section V.F in the format document)
 107structCentralDir = "<4s4B4HL2L5H2L"
 108stringCentralDir = b"PK\001\002"
 109sizeCentralDir = struct.calcsize(structCentralDir)
 110
 111# indexes of entries in the central directory structure
 112_CD_SIGNATURE = 0
 113_CD_CREATE_VERSION = 1
 114_CD_CREATE_SYSTEM = 2
 115_CD_EXTRACT_VERSION = 3
 116_CD_EXTRACT_SYSTEM = 4
 117_CD_FLAG_BITS = 5
 118_CD_COMPRESS_TYPE = 6
 119_CD_TIME = 7
 120_CD_DATE = 8
 121_CD_CRC = 9
 122_CD_COMPRESSED_SIZE = 10
 123_CD_UNCOMPRESSED_SIZE = 11
 124_CD_FILENAME_LENGTH = 12
 125_CD_EXTRA_FIELD_LENGTH = 13
 126_CD_COMMENT_LENGTH = 14
 127_CD_DISK_NUMBER_START = 15
 128_CD_INTERNAL_FILE_ATTRIBUTES = 16
 129_CD_EXTERNAL_FILE_ATTRIBUTES = 17
 130_CD_LOCAL_HEADER_OFFSET = 18
 131
 132# The "local file header" structure, magic number, size, and indices
 133# (section V.A in the format document)
 134structFileHeader = "<4s2B4HL2L2H"
 135stringFileHeader = b"PK\003\004"
 136sizeFileHeader = struct.calcsize(structFileHeader)
 137
 138_FH_SIGNATURE = 0
 139_FH_EXTRACT_VERSION = 1
 140_FH_EXTRACT_SYSTEM = 2
 141_FH_GENERAL_PURPOSE_FLAG_BITS = 3
 142_FH_COMPRESSION_METHOD = 4
 143_FH_LAST_MOD_TIME = 5
 144_FH_LAST_MOD_DATE = 6
 145_FH_CRC = 7
 146_FH_COMPRESSED_SIZE = 8
 147_FH_UNCOMPRESSED_SIZE = 9
 148_FH_FILENAME_LENGTH = 10
 149_FH_EXTRA_FIELD_LENGTH = 11
 150
 151# The "Zip64 end of central directory locator" structure, magic number, and size
 152structEndArchive64Locator = "<4sLQL"
 153stringEndArchive64Locator = b"PK\x06\x07"
 154sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
 155
 156# The "Zip64 end of central directory" record, magic number, size, and indices
 157# (section V.G in the format document)
 158structEndArchive64 = "<4sQ2H2L4Q"
 159stringEndArchive64 = b"PK\x06\x06"
 160sizeEndCentDir64 = struct.calcsize(structEndArchive64)
 161
 162_CD64_SIGNATURE = 0
 163_CD64_DIRECTORY_RECSIZE = 1
 164_CD64_CREATE_VERSION = 2
 165_CD64_EXTRACT_VERSION = 3
 166_CD64_DISK_NUMBER = 4
 167_CD64_DISK_NUMBER_START = 5
 168_CD64_NUMBER_ENTRIES_THIS_DISK = 6
 169_CD64_NUMBER_ENTRIES_TOTAL = 7
 170_CD64_DIRECTORY_SIZE = 8
 171_CD64_OFFSET_START_CENTDIR = 9
 172
 173_DD_SIGNATURE = 0x08074b50
 174
 175# TODO(T65337126): Uncomment next line
 176# _EXTRA_FIELD_STRUCT = struct.Struct('<HH')
 177
 178def _strip_extra(extra, xids):
 179    # TODO(T65337126): Remove next line
 180    _EXTRA_FIELD_STRUCT = struct.Struct('<HH')
 181    # Remove Extra Fields with specified IDs.
 182    unpack = _EXTRA_FIELD_STRUCT.unpack
 183    modified = False
 184    buffer = []
 185    start = i = 0
 186    while i + 4 <= len(extra):
 187        xid, xlen = unpack(extra[i : i + 4])
 188        j = i + 4 + xlen
 189        if xid in xids:
 190            if i != start:
 191                buffer.append(extra[start : i])
 192            start = j
 193            modified = True
 194        i = j
 195    if not modified:
 196        return extra
 197    return b''.join(buffer)
 198
 199def _check_zipfile(fp):
 200    try:
 201        if _EndRecData(fp):
 202            return True         # file has correct magic number
 203    except OSError:
 204        pass
 205    return False
 206
 207def is_zipfile(filename):
 208    """Quickly see if a file is a ZIP file by checking the magic number.
 209
 210    The filename argument may be a file or file-like object too.
 211    """
 212    result = False
 213    try:
 214        if hasattr(filename, "read"):
 215            result = _check_zipfile(fp=filename)
 216        else:
 217            with open(filename, "rb") as fp:
 218                result = _check_zipfile(fp)
 219    except OSError:
 220        pass
 221    return result
 222
 223def _EndRecData64(fpin, offset, endrec):
 224    """
 225    Read the ZIP64 end-of-archive records and use that to update endrec
 226    """
 227    try:
 228        fpin.seek(offset - sizeEndCentDir64Locator, 2)
 229    except OSError:
 230        # If the seek fails, the file is not large enough to contain a ZIP64
 231        # end-of-archive record, so just return the end record we were given.
 232        return endrec
 233
 234    data = fpin.read(sizeEndCentDir64Locator)
 235    if len(data) != sizeEndCentDir64Locator:
 236        return endrec
 237    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
 238    if sig != stringEndArchive64Locator:
 239        return endrec
 240
 241    if diskno != 0 or disks > 1:
 242        raise BadZipFile("zipfiles that span multiple disks are not supported")
 243
 244    # Assume no 'zip64 extensible data'
 245    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
 246    data = fpin.read(sizeEndCentDir64)
 247    if len(data) != sizeEndCentDir64:
 248        return endrec
 249    sig, sz, create_version, read_version, disk_num, disk_dir, \
 250        dircount, dircount2, dirsize, diroffset = \
 251        struct.unpack(structEndArchive64, data)
 252    if sig != stringEndArchive64:
 253        return endrec
 254
 255    # Update the original endrec using data from the ZIP64 record
 256    endrec[_ECD_SIGNATURE] = sig
 257    endrec[_ECD_DISK_NUMBER] = disk_num
 258    endrec[_ECD_DISK_START] = disk_dir
 259    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
 260    endrec[_ECD_ENTRIES_TOTAL] = dircount2
 261    endrec[_ECD_SIZE] = dirsize
 262    endrec[_ECD_OFFSET] = diroffset
 263    return endrec
 264
 265
 266def _EndRecData(fpin):
 267    """Return data from the "End of Central Directory" record, or None.
 268
 269    The data is a list of the nine items in the ZIP "End of central dir"
 270    record followed by a tenth item, the file seek offset of this record."""
 271
 272    # Determine file size
 273    fpin.seek(0, 2)
 274    filesize = fpin.tell()
 275
 276    # Check to see if this is ZIP file with no archive comment (the
 277    # "end of central directory" structure should be the last item in the
 278    # file if this is the case).
 279    try:
 280        fpin.seek(-sizeEndCentDir, 2)
 281    except OSError:
 282        return None
 283    data = fpin.read()
 284    if (len(data) == sizeEndCentDir and
 285        data[0:4] == stringEndArchive and
 286        data[-2:] == b"\000\000"):
 287        # the signature is correct and there's no comment, unpack structure
 288        endrec = struct.unpack(structEndArchive, data)
 289        endrec=list(endrec)
 290
 291        # Append a blank comment and record start offset
 292        endrec.append(b"")
 293        endrec.append(filesize - sizeEndCentDir)
 294
 295        # Try to read the "Zip64 end of central directory" structure
 296        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
 297
 298    # Either this is not a ZIP file, or it is a ZIP file with an archive
 299    # comment.  Search the end of the file for the "end of central directory"
 300    # record signature. The comment is the last item in the ZIP file and may be
 301    # up to 64K long.  It is assumed that the "end of central directory" magic
 302    # number does not appear in the comment.
 303    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
 304    fpin.seek(maxCommentStart, 0)
 305    data = fpin.read()
 306    start = data.rfind(stringEndArchive)
 307    if start >= 0:
 308        # found the magic number; attempt to unpack and interpret
 309        recData = data[start:start+sizeEndCentDir]
 310        if len(recData) != sizeEndCentDir:
 311            # Zip file is corrupted.
 312            return None
 313        endrec = list(struct.unpack(structEndArchive, recData))
 314        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
 315        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
 316        endrec.append(comment)
 317        endrec.append(maxCommentStart + start)
 318
 319        # Try to read the "Zip64 end of central directory" structure
 320        return _EndRecData64(fpin, maxCommentStart + start - filesize,
 321                             endrec)
 322
 323    # Unable to find a valid end of central directory structure
 324    return None
 325
 326
 327class ZipInfo (object):
 328    """Class with attributes describing each file in the ZIP archive."""
 329
 330    __slots__ = (
 331        'orig_filename',
 332        'filename',
 333        'date_time',
 334        'compress_type',
 335        '_compresslevel',
 336        'comment',
 337        'extra',
 338        'create_system',
 339        'create_version',
 340        'extract_version',
 341        'reserved',
 342        'flag_bits',
 343        'volume',
 344        'internal_attr',
 345        'external_attr',
 346        'header_offset',
 347        'CRC',
 348        'compress_size',
 349        'file_size',
 350        '_raw_time',
 351    )
 352
 353    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
 354        self.orig_filename = filename   # Original file name in archive
 355
 356        # Terminate the file name at the first null byte.  Null bytes in file
 357        # names are used as tricks by viruses in archives.
 358        null_byte = filename.find(chr(0))
 359        if null_byte >= 0:
 360            filename = filename[0:null_byte]
 361        # This is used to ensure paths in generated ZIP files always use
 362        # forward slashes as the directory separator, as required by the
 363        # ZIP format specification.
 364        if os.sep != "/" and os.sep in filename:
 365            filename = filename.replace(os.sep, "/")
 366
 367        self.filename = filename        # Normalized file name
 368        self.date_time = date_time      # year, month, day, hour, min, sec
 369
 370        if date_time[0] < 1980:
 371            raise ValueError('ZIP does not support timestamps before 1980')
 372
 373        # Standard values:
 374        self.compress_type = ZIP_STORED # Type of compression for the file
 375        self._compresslevel = None      # Level for the compressor
 376        self.comment = b""              # Comment for each file
 377        self.extra = b""                # ZIP extra data
 378        if sys.platform == 'win32':
 379            self.create_system = 0          # System which created ZIP archive
 380        else:
 381            # Assume everything else is unix-y
 382            self.create_system = 3          # System which created ZIP archive
 383        self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
 384        self.extract_version = DEFAULT_VERSION # Version needed to extract archive
 385        self.reserved = 0               # Must be zero
 386        self.flag_bits = 0              # ZIP flag bits
 387        self.volume = 0                 # Volume number of file header
 388        self.internal_attr = 0          # Internal attributes
 389        self.external_attr = 0          # External file attributes
 390        # Other attributes are set by class ZipFile:
 391        # header_offset         Byte offset to the file header
 392        # CRC                   CRC-32 of the uncompressed file
 393        # compress_size         Size of the compressed file
 394        # file_size             Size of the uncompressed file
 395
 396    def __repr__(self):
 397        result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
 398        if self.compress_type != ZIP_STORED:
 399            result.append(' compress_type=%s' %
 400                          compressor_names.get(self.compress_type,
 401                                               self.compress_type))
 402        hi = self.external_attr >> 16
 403        lo = self.external_attr & 0xFFFF
 404        if hi:
 405            result.append(' filemode=%r' % stat.filemode(hi))
 406        if lo:
 407            result.append(' external_attr=%#x' % lo)
 408        isdir = self.is_dir()
 409        if not isdir or self.file_size:
 410            result.append(' file_size=%r' % self.file_size)
 411        if ((not isdir or self.compress_size) and
 412            (self.compress_type != ZIP_STORED or
 413             self.file_size != self.compress_size)):
 414            result.append(' compress_size=%r' % self.compress_size)
 415        result.append('>')
 416        return ''.join(result)
 417
 418    def FileHeader(self, zip64=None):
 419        """Return the per-file header as a bytes object."""
 420        dt = self.date_time
 421        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
 422        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
 423        if self.flag_bits & 0x08:
 424            # Set these to zero because we write them after the file data
 425            CRC = compress_size = file_size = 0
 426        else:
 427            CRC = self.CRC
 428            compress_size = self.compress_size
 429            file_size = self.file_size
 430
 431        extra = self.extra
 432
 433        min_version = 0
 434        if zip64 is None:
 435            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
 436        if zip64:
 437            fmt = '<HHQQ'
 438            extra = extra + struct.pack(fmt,
 439                                        1, struct.calcsize(fmt)-4, file_size, compress_size)
 440        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
 441            if not zip64:
 442                raise LargeZipFile("Filesize would require ZIP64 extensions")
 443            # File is larger than what fits into a 4 byte integer,
 444            # fall back to the ZIP64 extension
 445            file_size = 0xffffffff
 446            compress_size = 0xffffffff
 447            min_version = ZIP64_VERSION
 448
 449        if self.compress_type == ZIP_BZIP2:
 450            min_version = max(BZIP2_VERSION, min_version)
 451        elif self.compress_type == ZIP_LZMA:
 452            min_version = max(LZMA_VERSION, min_version)
 453
 454        self.extract_version = max(min_version, self.extract_version)
 455        self.create_version = max(min_version, self.create_version)
 456        filename, flag_bits = self._encodeFilenameFlags()
 457        header = struct.pack(structFileHeader, stringFileHeader,
 458                             self.extract_version, self.reserved, flag_bits,
 459                             self.compress_type, dostime, dosdate, CRC,
 460                             compress_size, file_size,
 461                             len(filename), len(extra))
 462        return header + filename + extra
 463
 464    def _encodeFilenameFlags(self):
 465        try:
 466            return self.filename.encode('ascii'), self.flag_bits
 467        except UnicodeEncodeError:
 468            return self.filename.encode('utf-8'), self.flag_bits | 0x800
 469
 470    def _decodeExtra(self):
 471        # Try to decode the extra field.
 472        extra = self.extra
 473        unpack = struct.unpack
 474        while len(extra) >= 4:
 475            tp, ln = unpack('<HH', extra[:4])
 476            if ln+4 > len(extra):
 477                raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
 478            if tp == 0x0001:
 479                if ln >= 24:
 480                    counts = unpack('<QQQ', extra[4:28])
 481                elif ln == 16:
 482                    counts = unpack('<QQ', extra[4:20])
 483                elif ln == 8:
 484                    counts = unpack('<Q', extra[4:12])
 485                elif ln == 0:
 486                    counts = ()
 487                else:
 488                    raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
 489
 490                idx = 0
 491
 492                # ZIP64 extension (large files and/or large archives)
 493                if self.file_size in (0xffffffffffffffff, 0xffffffff):
 494                    if len(counts) <= idx:
 495                        raise BadZipFile(
 496                            "Corrupt zip64 extra field. File size not found."
 497                        )
 498                    self.file_size = counts[idx]
 499                    idx += 1
 500
 501                if self.compress_size == 0xFFFFFFFF:
 502                    if len(counts) <= idx:
 503                        raise BadZipFile(
 504                            "Corrupt zip64 extra field. Compress size not found."
 505                        )
 506                    self.compress_size = counts[idx]
 507                    idx += 1
 508
 509                if self.header_offset == 0xffffffff:
 510                    if len(counts) <= idx:
 511                        raise BadZipFile(
 512                            "Corrupt zip64 extra field. Header offset not found."
 513                        )
 514                    old = self.header_offset
 515                    self.header_offset = counts[idx]
 516                    idx+=1
 517
 518            extra = extra[ln+4:]
 519
 520    @classmethod
 521    def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
 522        """Construct an appropriate ZipInfo for a file on the filesystem.
 523
 524        filename should be the path to a file or directory on the filesystem.
 525
 526        arcname is the name which it will have within the archive (by default,
 527        this will be the same as filename, but without a drive letter and with
 528        leading path separators removed).
 529        """
 530        if isinstance(filename, os.PathLike):
 531            filename = os.fspath(filename)
 532        st = os.stat(filename)
 533        isdir = stat.S_ISDIR(st.st_mode)
 534        mtime = time.localtime(st.st_mtime)
 535        date_time = mtime[0:6]
 536        if not strict_timestamps and date_time[0] < 1980:
 537            date_time = (1980, 1, 1, 0, 0, 0)
 538        elif not strict_timestamps and date_time[0] > 2107:
 539            date_time = (2107, 12, 31, 23, 59, 59)
 540        # Create ZipInfo instance to store file information
 541        if arcname is None:
 542            arcname = filename
 543        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
 544        while arcname[0] in (os.sep, os.altsep):
 545            arcname = arcname[1:]
 546        if isdir:
 547            arcname += '/'
 548        zinfo = cls(arcname, date_time)
 549        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
 550        if isdir:
 551            zinfo.file_size = 0
 552            zinfo.external_attr |= 0x10  # MS-DOS directory flag
 553        else:
 554            zinfo.file_size = st.st_size
 555
 556        return zinfo
 557
 558    def is_dir(self):
 559        """Return True if this archive member is a directory."""
 560        return self.filename[-1] == '/'
 561
 562
 563# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
 564# internal keys. We noticed that a direct implementation is faster than
 565# relying on binascii.crc32().
 566
 567_crctable = None
 568def _gen_crc(crc):
 569    for j in range(8):
 570        if crc & 1:
 571            crc = (crc >> 1) ^ 0xEDB88320
 572        else:
 573            crc >>= 1
 574    return crc
 575
 576# ZIP supports a password-based form of encryption. Even though known
 577# plaintext attacks have been found against it, it is still useful
 578# to be able to get data out of such a file.
 579#
 580# Usage:
 581#     zd = _ZipDecrypter(mypwd)
 582#     plain_bytes = zd(cypher_bytes)
 583
 584def _ZipDecrypter(pwd):
 585    key0 = 305419896
 586    key1 = 591751049
 587    key2 = 878082192
 588
 589    global _crctable
 590    if _crctable is None:
 591        _crctable = list(map(_gen_crc, range(256)))
 592    crctable = _crctable
 593
 594    def crc32(ch, crc):
 595        """Compute the CRC32 primitive on one byte."""
 596        return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
 597
 598    def update_keys(c):
 599        nonlocal key0, key1, key2
 600        key0 = crc32(c, key0)
 601        key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
 602        key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
 603        key2 = crc32(key1 >> 24, key2)
 604
 605    for p in pwd:
 606        update_keys(p)
 607
 608    def decrypter(data):
 609        """Decrypt a bytes object."""
 610        result = bytearray()
 611        append = result.append
 612        for c in data:
 613            k = key2 | 2
 614            c ^= ((k * (k^1)) >> 8) & 0xFF
 615            update_keys(c)
 616            append(c)
 617        return bytes(result)
 618
 619    return decrypter
 620
 621
 622class LZMACompressor:
 623
 624    def __init__(self):
 625        self._comp = None
 626
 627    def _init(self):
 628        props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
 629        self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
 630            lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
 631        ])
 632        return struct.pack('<BBH', 9, 4, len(props)) + props
 633
 634    def compress(self, data):
 635        if self._comp is None:
 636            return self._init() + self._comp.compress(data)
 637        return self._comp.compress(data)
 638
 639    def flush(self):
 640        if self._comp is None:
 641            return self._init() + self._comp.flush()
 642        return self._comp.flush()
 643
 644
 645class LZMADecompressor:
 646
 647    def __init__(self):
 648        self._decomp = None
 649        self._unconsumed = b''
 650        self.eof = False
 651
 652    def decompress(self, data):
 653        if self._decomp is None:
 654            self._unconsumed += data
 655            if len(self._unconsumed) <= 4:
 656                return b''
 657            psize, = struct.unpack('<H', self._unconsumed[2:4])
 658            if len(self._unconsumed) <= 4 + psize:
 659                return b''
 660
 661            self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
 662                lzma._decode_filter_properties(lzma.FILTER_LZMA1,
 663                                               self._unconsumed[4:4 + psize])
 664            ])
 665            data = self._unconsumed[4 + psize:]
 666            del self._unconsumed
 667
 668        result = self._decomp.decompress(data)
 669        self.eof = self._decomp.eof
 670        return result
 671
 672
 673compressor_names = {
 674    0: 'store',
 675    1: 'shrink',
 676    2: 'reduce',
 677    3: 'reduce',
 678    4: 'reduce',
 679    5: 'reduce',
 680    6: 'implode',
 681    7: 'tokenize',
 682    8: 'deflate',
 683    9: 'deflate64',
 684    10: 'implode',
 685    12: 'bzip2',
 686    14: 'lzma',
 687    18: 'terse',
 688    19: 'lz77',
 689    97: 'wavpack',
 690    98: 'ppmd',
 691}
 692
 693def _check_compression(compression):
 694    if compression == ZIP_STORED:
 695        pass
 696    elif compression == ZIP_DEFLATED:
 697        if not zlib:
 698            raise RuntimeError(
 699                "Compression requires the (missing) zlib module")
 700    elif compression == ZIP_BZIP2:
 701        if not bz2:
 702            raise RuntimeError(
 703                "Compression requires the (missing) bz2 module")
 704    elif compression == ZIP_LZMA:
 705        if not lzma:
 706            raise RuntimeError(
 707                "Compression requires the (missing) lzma module")
 708    else:
 709        raise NotImplementedError("That compression method is not supported")
 710
 711
 712def _get_compressor(compress_type, compresslevel=None):
 713    if compress_type == ZIP_DEFLATED:
 714        if compresslevel is not None:
 715            return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
 716        return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
 717    elif compress_type == ZIP_BZIP2:
 718        if compresslevel is not None:
 719            return bz2.BZ2Compressor(compresslevel)
 720        return bz2.BZ2Compressor()
 721    # compresslevel is ignored for ZIP_LZMA
 722    elif compress_type == ZIP_LZMA:
 723        return LZMACompressor()
 724    else:
 725        return None
 726
 727
 728def _get_decompressor(compress_type):
 729    _check_compression(compress_type)
 730    if compress_type == ZIP_STORED:
 731        return None
 732    elif compress_type == ZIP_DEFLATED:
 733        return zlib.decompressobj(-15)
 734    elif compress_type == ZIP_BZIP2:
 735        return bz2.BZ2Decompressor()
 736    elif compress_type == ZIP_LZMA:
 737        return LZMADecompressor()
 738    else:
 739        descr = compressor_names.get(compress_type)
 740        if descr:
 741            raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
 742        else:
 743            raise NotImplementedError("compression type %d" % (compress_type,))
 744
 745
 746class _SharedFile:
 747    def __init__(self, file, pos, close, lock, writing):
 748        self._file = file
 749        self._pos = pos
 750        self._close = close
 751        self._lock = lock
 752        self._writing = writing
 753        self.seekable = file.seekable
 754        self.tell = file.tell
 755
 756    def seek(self, offset, whence=0):
 757        with self._lock:
 758            if self._writing():
 759                raise ValueError("Can't reposition in the ZIP file while "
 760                        "there is an open writing handle on it. "
 761                        "Close the writing handle before trying to read.")
 762            self._file.seek(offset, whence)
 763            self._pos = self._file.tell()
 764            return self._pos
 765
 766    def read(self, n=-1):
 767        with self._lock:
 768            if self._writing():
 769                raise ValueError("Can't read from the ZIP file while there "
 770                        "is an open writing handle on it. "
 771                        "Close the writing handle before trying to read.")
 772            self._file.seek(self._pos)
 773            data = self._file.read(n)
 774            self._pos = self._file.tell()
 775            return data
 776
 777    def close(self):
 778        if self._file is not None:
 779            fileobj = self._file
 780            self._file = None
 781            self._close(fileobj)
 782
 783# Provide the tell method for unseekable stream
 784class _Tellable:
 785    def __init__(self, fp):
 786        self.fp = fp
 787        self.offset = 0
 788
 789    def write(self, data):
 790        n = self.fp.write(data)
 791        self.offset += n
 792        return n
 793
 794    def tell(self):
 795        return self.offset
 796
 797    def flush(self):
 798        self.fp.flush()
 799
 800    def close(self):
 801        self.fp.close()
 802
 803
 804class ZipExtFile(io.BufferedIOBase):
 805    """File-like object for reading an archive member.
 806       Is returned by ZipFile.open().
 807    """
 808
 809    # Max size supported by decompressor.
 810    MAX_N = 1 << 31 - 1
 811
 812    # Read from compressed files in 4k blocks.
 813    MIN_READ_SIZE = 4096
 814
 815    # Chunk size to read during seek
 816    MAX_SEEK_READ = 1 << 24
 817
 818    def __init__(self, fileobj, mode, zipinfo, pwd=None,
 819                 close_fileobj=False):
 820        self._fileobj = fileobj
 821        self._pwd = pwd
 822        self._close_fileobj = close_fileobj
 823
 824        self._compress_type = zipinfo.compress_type
 825        self._compress_left = zipinfo.compress_size
 826        self._left = zipinfo.file_size
 827
 828        self._decompressor = _get_decompressor(self._compress_type)
 829
 830        self._eof = False
 831        self._readbuffer = b''
 832        self._offset = 0
 833
 834        self.newlines = None
 835
 836        self.mode = mode
 837        self.name = zipinfo.filename
 838
 839        if hasattr(zipinfo, 'CRC'):
 840            self._expected_crc = zipinfo.CRC
 841            self._running_crc = crc32(b'')
 842        else:
 843            self._expected_crc = None
 844
 845        self._seekable = False
 846        try:
 847            if fileobj.seekable():
 848                self._orig_compress_start = fileobj.tell()
 849                self._orig_compress_size = zipinfo.compress_size
 850                self._orig_file_size = zipinfo.file_size
 851                self._orig_start_crc = self._running_crc
 852                self._seekable = True
 853        except AttributeError:
 854            pass
 855
 856        self._decrypter = None
 857        if pwd:
 858            if zipinfo.flag_bits & 0x8:
 859                # compare against the file type from extended local headers
 860                check_byte = (zipinfo._raw_time >> 8) & 0xff
 861            else:
 862                # compare against the CRC otherwise
 863                check_byte = (zipinfo.CRC >> 24) & 0xff
 864            h = self._init_decrypter()
 865            if h != check_byte:
 866                raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
 867
 868
 869    def _init_decrypter(self):
 870        self._decrypter = _ZipDecrypter(self._pwd)
 871        # The first 12 bytes in the cypher stream is an encryption header
 872        #  used to strengthen the algorithm. The first 11 bytes are
 873        #  completely random, while the 12th contains the MSB of the CRC,
 874        #  or the MSB of the file time depending on the header type
 875        #  and is used to check the correctness of the password.
 876        header = self._fileobj.read(12)
 877        self._compress_left -= 12
 878        return self._decrypter(header)[11]
 879
 880    def __repr__(self):
 881        result = ['<%s.%s' % (self.__class__.__module__,
 882                              self.__class__.__qualname__)]
 883        if not self.closed:
 884            result.append(' name=%r mode=%r' % (self.name, self.mode))
 885            if self._compress_type != ZIP_STORED:
 886                result.append(' compress_type=%s' %
 887                              compressor_names.get(self._compress_type,
 888                                                   self._compress_type))
 889        else:
 890            result.append(' [closed]')
 891        result.append('>')
 892        return ''.join(result)
 893
 894    def readline(self, limit=-1):
 895        """Read and return a line from the stream.
 896
 897        If limit is specified, at most limit bytes will be read.
 898        """
 899
 900        if limit < 0:
 901            # Shortcut common case - newline found in buffer.
 902            i = self._readbuffer.find(b'\n', self._offset) + 1
 903            if i > 0:
 904                line = self._readbuffer[self._offset: i]
 905                self._offset = i
 906                return line
 907
 908        return io.BufferedIOBase.readline(self, limit)
 909
 910    def peek(self, n=1):
 911        """Returns buffered bytes without advancing the position."""
 912        if n > len(self._readbuffer) - self._offset:
 913            chunk = self.read(n)
 914            if len(chunk) > self._offset:
 915                self._readbuffer = chunk + self._readbuffer[self._offset:]
 916                self._offset = 0
 917            else:
 918                self._offset -= len(chunk)
 919
 920        # Return up to 512 bytes to reduce allocation overhead for tight loops.
 921        return self._readbuffer[self._offset: self._offset + 512]
 922
 923    def readable(self):
 924        return True
 925
 926    def read(self, n=-1):
 927        """Read and return up to n bytes.
 928        If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
 929        """
 930        if n is None or n < 0:
 931            buf = self._readbuffer[self._offset:]
 932            self._readbuffer = b''
 933            self._offset = 0
 934            while not self._eof:
 935                buf += self._read1(self.MAX_N)
 936            return buf
 937
 938        end = n + self._offset
 939        if end < len(self._readbuffer):
 940            buf = self._readbuffer[self._offset:end]
 941            self._offset = end
 942            return buf
 943
 944        n = end - len(self._readbuffer)
 945        buf = self._readbuffer[self._offset:]
 946        self._readbuffer = b''
 947        self._offset = 0
 948        while n > 0 and not self._eof:
 949            data = self._read1(n)
 950            if n < len(data):
 951                self._readbuffer = data
 952                self._offset = n
 953                buf += data[:n]
 954                break
 955            buf += data
 956            n -= len(data)
 957        return buf
 958
 959    def _update_crc(self, newdata):
 960        # Update the CRC using the given data.
 961        if self._expected_crc is None:
 962            # No need to compute the CRC if we don't have a reference value
 963            return
 964        self._running_crc = crc32(newdata, self._running_crc)
 965        # Check the CRC if we're at the end of the file
 966        if self._eof and self._running_crc != self._expected_crc:
 967            raise BadZipFile("Bad CRC-32 for file %r" % self.name)
 968
 969    def read1(self, n):
 970        """Read up to n bytes with at most one read() system call."""
 971
 972        if n is None or n < 0:
 973            buf = self._readbuffer[self._offset:]
 974            self._readbuffer = b''
 975            self._offset = 0
 976            while not self._eof:
 977                data = self._read1(self.MAX_N)
 978                if data:
 979                    buf += data
 980                    break
 981            return buf
 982
 983        end = n + self._offset
 984        if end < len(self._readbuffer):
 985            buf = self._readbuffer[self._offset:end]
 986            self._offset = end
 987            return buf
 988
 989        n = end - len(self._readbuffer)
 990        buf = self._readbuffer[self._offset:]
 991        self._readbuffer = b''
 992        self._offset = 0
 993        if n > 0:
 994            while not self._eof:
 995                data = self._read1(n)
 996                if n < len(data):
 997                    self._readbuffer = data
 998                    self._offset = n
 999                    buf += data[:n]
1000                    break
1001                if data:
1002                    buf += data
1003                    break
1004        return buf
1005
1006    def _read1(self, n):
1007        # Read up to n compressed bytes with at most one read() system call,
1008        # decrypt and decompress them.
1009        if self._eof or n <= 0:
1010            return b''
1011
1012        # Read from file.
1013        if self._compress_type == ZIP_DEFLATED:
1014            ## Handle unconsumed data.
1015            data = self._decompressor.unconsumed_tail
1016            if n > len(data):
1017                data += self._read2(n - len(data))
1018        else:
1019            data = self._read2(n)
1020
1021        if self._compress_type == ZIP_STORED:
1022            self._eof = self._compress_left <= 0
1023        elif self._compress_type == ZIP_DEFLATED:
1024            n = max(n, self.MIN_READ_SIZE)
1025            data = self._decompressor.decompress(data, n)
1026            self._eof = (self._decompressor.eof or
1027                         self._compress_left <= 0 and
1028                         not self._decompressor.unconsumed_tail)
1029            if self._eof:
1030                data += self._decompressor.flush()
1031        else:
1032            data = self._decompressor.decompress(data)
1033            self._eof = self._decompressor.eof or self._compress_left <= 0
1034
1035        data = data[:self._left]
1036        self._left -= len(data)
1037        if self._left <= 0:
1038            self._eof = True
1039        self._update_crc(data)
1040        return data
1041
1042    def _read2(self, n):
1043        if self._compress_left <= 0:
1044            return b''
1045
1046        n = max(n, self.MIN_READ_SIZE)
1047        n = min(n, self._compress_left)
1048
1049        data = self._fileobj.read(n)
1050        self._compress_left -= len(data)
1051        if not data:
1052            raise EOFError
1053
1054        if self._decrypter is not None:
1055            data = self._decrypter(data)
1056        return data
1057
1058    def close(self):
1059        try:
1060            if self._close_fileobj:
1061                self._fileobj.close()
1062        finally:
1063            super().close()
1064
1065    def seekable(self):
1066        return self._seekable
1067
1068    def seek(self, offset, whence=0):
1069        if not self._seekable:
1070            raise io.UnsupportedOperation("underlying stream is not seekable")
1071        curr_pos = self.tell()
1072        if whence == 0: # Seek from start of file
1073            new_pos = offset
1074        elif whence == 1: # Seek from current position
1075            new_pos = curr_pos + offset
1076        elif whence == 2: # Seek from EOF
1077            new_pos = self._orig_file_size + offset
1078        else:
1079            raise ValueError("whence must be os.SEEK_SET (0), "
1080                             "os.SEEK_CUR (1), or os.SEEK_END (2)")
1081
1082        if new_pos > self._orig_file_size:
1083            new_pos = self._orig_file_size
1084
1085        if new_pos < 0:
1086            new_pos = 0
1087
1088        read_offset = new_pos - curr_pos
1089        buff_offset = read_offset + self._offset
1090
1091        if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1092            # Just move the _offset index if the new position is in the _readbuffer
1093            self._offset = buff_offset
1094            read_offset = 0
1095        elif read_offset < 0:
1096            # Position is before the current position. Reset the ZipExtFile
1097            self._fileobj.seek(self._orig_compress_start)
1098            self._running_crc = self._orig_start_crc
1099            self._compress_left = self._orig_compress_size
1100            self._left = self._orig_file_size
1101            self._readbuffer = b''
1102            self._offset = 0
1103            self._decompressor = _get_decompressor(self._compress_type)
1104            self._eof = False
1105            read_offset = new_pos
1106            if self._decrypter is not None:
1107                self._init_decrypter()
1108
1109        while read_offset > 0:
1110            read_len = min(self.MAX_SEEK_READ, read_offset)
1111            self.read(read_len)
1112            read_offset -= read_len
1113
1114        return self.tell()
1115
1116    def tell(self):
1117        if not self._seekable:
1118            raise io.UnsupportedOperation("underlying stream is not seekable")
1119        filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1120        return filepos
1121
1122
1123class _ZipWriteFile(io.BufferedIOBase):
1124    def __init__(self, zf, zinfo, zip64):
1125        self._zinfo = zinfo
1126        self._zip64 = zip64
1127        self._zipfile = zf
1128        self._compressor = _get_compressor(zinfo.compress_type,
1129                                           zinfo._compresslevel)
1130        self._file_size = 0
1131        self._compress_size = 0
1132        self._crc = 0
1133
1134    @property
1135    def _fileobj(self):
1136        return self._zipfile.fp
1137
1138    def writable(self):
1139        return True
1140
1141    def write(self, data):
1142        if self.closed:
1143            raise ValueError('I/O operation on closed file.')
1144        nbytes = len(data)
1145        self._file_size += nbytes
1146        self._crc = crc32(data, self._crc)
1147        if self._compressor:
1148            data = self._compressor.compress(data)
1149            self._compress_size += len(data)
1150        self._fileobj.write(data)
1151        return nbytes
1152
1153    def close(self):
1154        if self.closed:
1155            return
1156        try:
1157            super().close()
1158            # Flush any data from the compressor, and update header info
1159            if self._compressor:
1160                buf = self._compressor.flush()
1161                self._compress_size += len(buf)
1162                self._fileobj.write(buf)
1163                self._zinfo.compress_size = self._compress_size
1164            else:
1165                self._zinfo.compress_size = self._file_size
1166            self._zinfo.CRC = self._crc
1167            self._zinfo.file_size = self._file_size
1168
1169            # Write updated header info
1170            if self._zinfo.flag_bits & 0x08:
1171                # Write CRC and file sizes after the file data
1172                fmt = '<LLQQ' if self._zip64 else '<LLLL'
1173                self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1174                    self._zinfo.compress_size, self._zinfo.file_size))
1175                self._zipfile.start_dir = self._fileobj.tell()
1176            else:
1177                if not self._zip64:
1178                    if self._file_size > ZIP64_LIMIT:
1179                        raise RuntimeError(
1180                            'File size unexpectedly exceeded ZIP64 limit')
1181                    if self._compress_size > ZIP64_LIMIT:
1182                        raise RuntimeError(
1183                            'Compressed size unexpectedly exceeded ZIP64 limit')
1184                # Seek backwards and write file header (which will now include
1185                # correct CRC and file sizes)
1186
1187                # Preserve current position in file
1188                self._zipfile.start_dir = self._fileobj.tell()
1189                self._fileobj.seek(self._zinfo.header_offset)
1190                self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1191                self._fileobj.seek(self._zipfile.start_dir)
1192
1193            # Successfully written: Add file to our caches
1194            self._zipfile.filelist.append(self._zinfo)
1195            self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1196        finally:
1197            self._zipfile._writing = False
1198
1199
1200
1201class ZipFile:
1202    """ Class with methods to open, read, write, close, list zip files.
1203
1204    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1205                compresslevel=None)
1206
1207    file: Either the path to the file, or a file-like object.
1208          If it is a path, the file will be opened and closed by ZipFile.
1209    mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1210          or append 'a'.
1211    compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1212                 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1213    allowZip64: if True ZipFile will create files with ZIP64 extensions when
1214                needed, otherwise it will raise an exception when this would
1215                be necessary.
1216    compresslevel: None (default for the given compression type) or an integer
1217                   specifying the level to pass to the compressor.
1218                   When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1219                   When using ZIP_DEFLATED integers 0 through 9 are accepted.
1220                   When using ZIP_BZIP2 integers 1 through 9 are accepted.
1221
1222    """
1223
1224    fp = None                   # Set here since __del__ checks it
1225    _windows_illegal_name_trans_table = None
1226
1227    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1228                 compresslevel=None, *, strict_timestamps=True):
1229        """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1230        or append 'a'."""
1231        if mode not in ('r', 'w', 'x', 'a'):
1232            raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1233
1234        _check_compression(compression)
1235
1236        self._allowZip64 = allowZip64
1237        self._didModify = False
1238        self.debug = 0  # Level of printing: 0 through 3
1239        self.NameToInfo = {}    # Find file info given name
1240        self.filelist = []      # List of ZipInfo instances for archive
1241        self.compression = compression  # Method of compression
1242        self.compresslevel = compresslevel
1243        self.mode = mode
1244        self.pwd = None
1245        self._comment = b''
1246        self._strict_timestamps = strict_timestamps
1247
1248        # Check if we were passed a file-like object
1249        if isinstance(file, os.PathLike):
1250            file = os.fspath(file)
1251        if isinstance(file, str):
1252            # No, it's a filename
1253            self._filePassed = 0
1254            self.filename = file
1255            modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1256                        'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1257            filemode = modeDict[mode]
1258            while True:
1259                try:
1260                    self.fp = io.open(file, filemode)
1261                except OSError:
1262                    if filemode in modeDict:
1263                        filemode = modeDict[filemode]
1264                        continue
1265                    raise
1266                break
1267        else:
1268            self._filePassed = 1
1269            self.fp = file
1270            self.filename = getattr(file, 'name', None)
1271        self._fileRefCnt = 1
1272        self._lock = threading.RLock()
1273        self._seekable = True
1274        self._writing = False
1275
1276        try:
1277            if mode == 'r':
1278                self._RealGetContents()
1279            elif mode in ('w', 'x'):
1280                # set the modified flag so central directory gets written
1281                # even if no files are added to the archive
1282                self._didModify = True
1283                try:
1284                    self.start_dir = self.fp.tell()
1285                except (AttributeError, OSError):
1286                    self.fp = _Tellable(self.fp)
1287                    self.start_dir = 0
1288                    self._seekable = False
1289                else:
1290                    # Some file-like objects can provide tell() but not seek()
1291                    try:
1292                        self.fp.seek(self.start_dir)
1293                    except (AttributeError, OSError):
1294                        self._seekable = False
1295            elif mode == 'a':
1296                try:
1297                    # See if file is a zip file
1298                    self._RealGetContents()
1299                    # seek to start of directory and overwrite
1300                    self.fp.seek(self.start_dir)
1301                except BadZipFile:
1302                    # file is not a zip file, just append
1303                    self.fp.seek(0, 2)
1304
1305                    # set the modified flag so central directory gets written
1306                    # even if no files are added to the archive
1307                    self._didModify = True
1308                    self.start_dir = self.fp.tell()
1309            else:
1310                raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1311        except:
1312            fp = self.fp
1313            self.fp = None
1314            self._fpclose(fp)
1315            raise
1316
1317    def __enter__(self):
1318        return self
1319
1320    def __exit__(self, type, value, traceback):
1321        self.close()
1322
1323    def __repr__(self):
1324        result = ['<%s.%s' % (self.__class__.__module__,
1325                              self.__class__.__qualname__)]
1326        if self.fp is not None:
1327            if self._filePassed:
1328                result.append(' file=%r' % self.fp)
1329            elif self.filename is not None:
1330                result.append(' filename=%r' % self.filename)
1331            result.append(' mode=%r' % self.mode)
1332        else:
1333            result.append(' [closed]')
1334        result.append('>')
1335        return ''.join(result)
1336
1337    def _RealGetContents(self):
1338        """Read in the table of contents for the ZIP file."""
1339        fp = self.fp
1340        try:
1341            endrec = _EndRecData(fp)
1342        except OSError:
1343            raise BadZipFile("File is not a zip file")
1344        if not endrec:
1345            raise BadZipFile("File is not a zip file")
1346        if self.debug > 1:
1347            print(endrec)
1348        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
1349        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
1350        self._comment = endrec[_ECD_COMMENT]    # archive comment
1351
1352        # "concat" is zero, unless zip was concatenated to another file
1353        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1354        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1355            # If Zip64 extension structures are present, account for them
1356            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1357
1358        if self.debug > 2:
1359            inferred = concat + offset_cd
1360            print("given, inferred, offset", offset_cd, inferred, concat)
1361        # self.start_dir:  Position of start of central directory
1362        self.start_dir = offset_cd + concat
1363        fp.seek(self.start_dir, 0)
1364        data = fp.read(size_cd)
1365        fp = io.BytesIO(data)
1366        total = 0
1367        while total < size_cd:
1368            centdir = fp.read(sizeCentralDir)
1369            if len(centdir) != sizeCentralDir:
1370                raise BadZipFile("Truncated central directory")
1371            centdir = struct.unpack(structCentralDir, centdir)
1372            if centdir[_CD_SIGNATURE] != stringCentralDir:
1373                raise BadZipFile("Bad magic number for central directory")
1374            if self.debug > 2:
1375                print(centdir)
1376            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1377            flags = centdir[5]
1378            if flags & 0x800:
1379                # UTF-8 file names extension
1380                filename = filename.decode('utf-8')
1381            else:
1382                # Historical ZIP filename encoding
1383                filename = filename.decode('cp437')
1384            # Create ZipInfo instance to store file information
1385            x = ZipInfo(filename)
1386            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1387            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1388            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1389            (x.create_version, x.create_system, x.extract_version, x.reserved,
1390             x.flag_bits, x.compress_type, t, d,
1391             x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1392            if x.extract_version > MAX_EXTRACT_VERSION:
1393                raise NotImplementedError("zip file version %.1f" %
1394                                          (x.extract_version / 10))
1395            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1396            # Convert date/time code to (year, month, day, hour, min, sec)
1397            x._raw_time = t
1398            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1399                            t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1400
1401            x._decodeExtra()
1402            x.header_offset = x.header_offset + concat
1403            self.filelist.append(x)
1404            self.NameToInfo[x.filename] = x
1405
1406            # update total bytes read from central directory
1407            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1408                     + centdir[_CD_EXTRA_FIELD_LENGTH]
1409                     + centdir[_CD_COMMENT_LENGTH])
1410
1411            if self.debug > 2:
1412                print("total", total)
1413
1414
1415    def namelist(self):
1416        """Return a list of file names in the archive."""
1417        return [data.filename for data in self.filelist]
1418
1419    def infolist(self):
1420        """Return a list of class ZipInfo instances for files in the
1421        archive."""
1422        return self.filelist
1423
1424    def printdir(self, file=None):
1425        """Print a table of contents for the zip file."""
1426        print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
1427              file=file)
1428        for zinfo in self.filelist:
1429            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1430            print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1431                  file=file)
1432
1433    def testzip(self):
1434        """Read all the files and check the CRC."""
1435        chunk_size = 2 ** 20
1436        for zinfo in self.filelist:
1437            try:
1438                # Read by chunks, to avoid an OverflowError or a
1439                # MemoryError with very large embedded files.
1440                with self.open(zinfo.filename, "r") as f:
1441                    while f.read(chunk_size):     # Check CRC-32
1442                        pass
1443            except BadZipFile:
1444                return zinfo.filename
1445
1446    def getinfo(self, name):
1447        """Return the instance of ZipInfo given 'name'."""
1448        info = self.NameToInfo.get(name)
1449        if info is None:
1450            raise KeyError(
1451                'There is no item named %r in the archive' % name)
1452
1453        return info
1454
1455    def setpassword(self, pwd):
1456        """Set default password for encrypted files."""
1457        if pwd and not isinstance(pwd, bytes):
1458            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1459        if pwd:
1460            self.pwd = pwd
1461        else:
1462            self.pwd = None
1463
1464    @property
1465    def comment(self):
1466        """The comment text associated with the ZIP file."""
1467        return self._comment
1468
1469    @comment.setter
1470    def comment(self, comment):
1471        if not isinstance(comment, bytes):
1472            raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1473        # check for valid comment length
1474        if len(comment) > ZIP_MAX_COMMENT:
1475            import warnings
1476            warnings.warn('Archive comment is too long; truncating to %d bytes'
1477                          % ZIP_MAX_COMMENT, stacklevel=2)
1478            comment = comment[:ZIP_MAX_COMMENT]
1479        self._comment = comment
1480        self._didModify = True
1481
1482    def read(self, name, pwd=None):
1483        """Return file bytes for name."""
1484        with self.open(name, "r", pwd) as fp:
1485            return fp.read()
1486
1487    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1488        """Return file-like object for 'name'.
1489
1490        name is a string for the file name within the ZIP file, or a ZipInfo
1491        object.
1492
1493        mode should be 'r' to read a file already in the ZIP file, or 'w' to
1494        write to a file newly added to the archive.
1495
1496        pwd is the password to decrypt files (only used for reading).
1497
1498        When writing, if the file size is not known in advance but may exceed
1499        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1500        files.  If the size is known in advance, it is best to pass a ZipInfo
1501        instance for name, with zinfo.file_size set.
1502        """
1503        if mode not in {"r", "w"}:
1504            raise ValueError('open() requires mode "r" or "w"')
1505        if pwd and not isinstance(pwd, bytes):
1506            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1507        if pwd and (mode == "w"):
1508            raise ValueError("pwd is only supported for reading files")
1509        if not self.fp:
1510            raise ValueError(
1511                "Attempt to use ZIP archive that was already closed")
1512
1513        # Make sure we have an info object
1514        if isinstance(name, ZipInfo):
1515            # 'name' is already an info object
1516            zinfo = name
1517        elif mode == 'w':
1518            zinfo = ZipInfo(name)
1519            zinfo.compress_type = self.compression
1520            zinfo._compresslevel = self.compresslevel
1521        else:
1522            # Get info object for name
1523            zinfo = self.getinfo(name)
1524
1525        if mode == 'w':
1526            return self._open_to_write(zinfo, force_zip64=force_zip64)
1527
1528        if self._writing:
1529            raise ValueError("Can't read from the ZIP file while there "
1530                    "is an open writing handle on it. "
1531                    "Close the writing handle before trying to read.")
1532
1533        # Open for reading:
1534        self._fileRefCnt += 1
1535        zef_file = _SharedFile(self.fp, zinfo.header_offset,
1536                               self._fpclose, self._lock, lambda: self._writing)
1537        try:
1538            # Skip the file header:
1539            fheader = zef_file.read(sizeFileHeader)
1540            if len(fheader) != sizeFileHeader:
1541                raise BadZipFile("Truncated file header")
1542            fheader = struct.unpack(structFileHeader, fheader)
1543            if fheader[_FH_SIGNATURE] != stringFileHeader:
1544                raise BadZipFile("Bad magic number for file header")
1545
1546            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1547            if fheader[_FH_EXTRA_FIELD_LENGTH]:
1548                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1549
1550            if zinfo.flag_bits & 0x20:
1551                # Zip 2.7: compressed patched data
1552                raise NotImplementedError("compressed patched data (flag bit 5)")
1553
1554            if zinfo.flag_bits & 0x40:
1555                # strong encryption
1556                raise NotImplementedError("strong encryption (flag bit 6)")
1557
1558            if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800:
1559                # UTF-8 filename
1560                fname_str = fname.decode("utf-8")
1561            else:
1562                fname_str = fname.decode("cp437")
1563
1564            if fname_str != zinfo.orig_filename:
1565                raise BadZipFile(
1566                    'File name in directory %r and header %r differ.'
1567                    % (zinfo.orig_filename, fname))
1568
1569            # check for encrypted flag & handle password
1570            is_encrypted = zinfo.flag_bits & 0x1
1571            if is_encrypted:
1572                if not pwd:
1573                    pwd = self.pwd
1574                if not pwd:
1575                    raise RuntimeError("File %r is encrypted, password "
1576                                       "required for extraction" % name)
1577            else:
1578                pwd = None
1579
1580            return ZipExtFile(zef_file, mode, zinfo, pwd, True)
1581        except:
1582            zef_file.close()
1583            raise
1584
1585    def _open_to_write(self, zinfo, force_zip64=False):
1586        if force_zip64 and not self._allowZip64:
1587            raise ValueError(
1588                "force_zip64 is True, but allowZip64 was False when opening "
1589                "the ZIP file."
1590            )
1591        if self._writing:
1592            raise ValueError("Can't write to the ZIP file while there is "
1593                             "another write handle open on it. "
1594                             "Close the first handle before opening another.")
1595
1596        # Sizes and CRC are overwritten with correct data after processing the file
1597        if not hasattr(zinfo, 'file_size'):
1598            zinfo.file_size = 0
1599        zinfo.compress_size = 0
1600        zinfo.CRC = 0
1601
1602        zinfo.flag_bits = 0x00
1603        if zinfo.compress_type == ZIP_LZMA:
1604            # Compressed data includes an end-of-stream (EOS) marker
1605            zinfo.flag_bits |= 0x02
1606        if not self._seekable:
1607            zinfo.flag_bits |= 0x08
1608
1609        if not zinfo.external_attr:
1610            zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
1611
1612        # Compressed size can be larger than uncompressed size
1613        zip64 = self._allowZip64 and \
1614                (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1615
1616        if self._seekable:
1617            self.fp.seek(self.start_dir)
1618        zinfo.header_offset = self.fp.tell()
1619
1620        self._writecheck(zinfo)
1621        self._didModify = True
1622
1623        self.fp.write(zinfo.FileHeader(zip64))
1624
1625        self._writing = True
1626        return _ZipWriteFile(self, zinfo, zip64)
1627
1628    def extract(self, member, path=None, pwd=None):
1629        """Extract a member from the archive to the current working directory,
1630           using its full name. Its file information is extracted as accurately
1631           as possible. `member' may be a filename or a ZipInfo object. You can
1632           specify a different directory using `path'.
1633        """
1634        if path is None:
1635            path = os.getcwd()
1636        else:
1637            path = os.fspath(path)
1638
1639        return self._extract_member(member, path, pwd)
1640
1641    def extractall(self, path=None, members=None, pwd=None):
1642        """Extract all members from the archive to the current working
1643           directory. `path' specifies a different directory to extract to.
1644           `members' is optional and must be a subset of the list returned
1645           by namelist().
1646        """
1647        if members is None:
1648            members = self.namelist()
1649
1650        if path is None:
1651            path = os.getcwd()
1652        else:
1653            path = os.fspath(path)
1654
1655        for zipinfo in members:
1656            self._extract_member(zipinfo, path, pwd)
1657
1658    @classmethod
1659    def _sanitize_windows_name(cls, arcname, pathsep):
1660        """Replace bad characters and remove trailing dots from parts."""
1661        table = cls._windows_illegal_name_trans_table
1662        if not table:
1663            illegal = ':<>|"?*'
1664            table = str.maketrans(illegal, '_' * len(illegal))
1665            cls._windows_illegal_name_trans_table = table
1666        arcname = arcname.translate(table)
1667        # remove trailing dots
1668        arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1669        # rejoin, removing empty parts.
1670        arcname = pathsep.join(x for x in arcname if x)
1671        return arcname
1672
1673    def _extract_member(self, member, targetpath, pwd):
1674        """Extract the ZipInfo object 'member' to a physical
1675           file on the path targetpath.
1676        """
1677        if not isinstance(member, ZipInfo):
1678            member = self.getinfo(member)
1679
1680        # build the destination pathname, replacing
1681        # forward slashes to platform specific separators.
1682        arcname = member.filename.replace('/', os.path.sep)
1683
1684        if os.path.altsep:
1685            arcname = arcname.replace(os.path.altsep, os.path.sep)
1686        # interpret absolute pathname as relative, remove drive letter or
1687        # UNC path, redundant separators, "." and ".." components.
1688        arcname = os.path.splitdrive(arcname)[1]
1689        invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1690        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1691                                   if x not in invalid_path_parts)
1692        if os.path.sep == '\\':
1693            # filter illegal characters on Windows
1694            arcname = self._sanitize_windows_name(arcname, os.path.sep)
1695
1696        targetpath = os.path.join(targetpath, arcname)
1697        targetpath = os.path.normpath(targetpath)
1698
1699        # Create all upper directories if necessary.
1700        upperdirs = os.path.dirname(targetpath)
1701        if upperdirs and not os.path.exists(upperdirs):
1702            os.makedirs(upperdirs)
1703
1704        if member.is_dir():
1705            if not os.path.isdir(targetpath):
1706                os.mkdir(targetpath)
1707            return targetpath
1708
1709        with self.open(member, pwd=pwd) as source, \
1710             open(targetpath, "wb") as target:
1711            shutil.copyfileobj(source, target)
1712
1713        return targetpath
1714
1715    def _writecheck(self, zinfo):
1716        """Check for errors before writing a file to the archive."""
1717        if zinfo.filename in self.NameToInfo:
1718            import warnings
1719            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1720        if self.mode not in ('w', 'x', 'a'):
1721            raise ValueError("write() requires mode 'w', 'x', or 'a'")
1722        if not self.fp:
1723            raise ValueError(
1724                "Attempt to write ZIP archive that was already closed")
1725        _check_compression(zinfo.compress_type)
1726        if not self._allowZip64:
1727            requires_zip64 = None
1728            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1729                requires_zip64 = "Files count"
1730            elif zinfo.file_size > ZIP64_LIMIT:
1731                requires_zip64 = "Filesize"
1732            elif zinfo.header_offset > ZIP64_LIMIT:
1733                requires_zip64 = "Zipfile size"
1734            if requires_zip64:
1735                raise LargeZipFile(requires_zip64 +
1736                                   " would require ZIP64 extensions")
1737
1738    def write(self, filename, arcname=None,
1739              compress_type=None, compresslevel=None):
1740        """Put the bytes from filename into the archive under the name
1741        arcname."""
1742        if not self.fp:
1743            raise ValueError(
1744                "Attempt to write to ZIP archive that was already closed")
1745        if self._writing:
1746            raise ValueError(
1747                "Can't write to ZIP archive while an open writing handle exists"
1748            )
1749
1750        zinfo = ZipInfo.from_file(filename, arcname,
1751                                  strict_timestamps=self._strict_timestamps)
1752
1753        if zinfo.is_dir():
1754            zinfo.compress_size = 0
1755            zinfo.CRC = 0
1756        else:
1757            if compress_type is not None:
1758                zinfo.compress_type = compress_type
1759            else:
1760                zinfo.compress_type = self.compression
1761
1762            if compresslevel is not None:
1763                zinfo._compresslevel = compresslevel
1764            else:
1765                zinfo._compresslevel = self.compresslevel
1766
1767        if zinfo.is_dir():
1768            with self._lock:
1769                if self._seekable:
1770                    self.fp.seek(self.start_dir)
1771                zinfo.header_offset = self.fp.tell()  # Start of header bytes
1772                if zinfo.compress_type == ZIP_LZMA:
1773                # Compressed data includes an end-of-stream (EOS) marker
1774                    zinfo.flag_bits |= 0x02
1775
1776                self._writecheck(zinfo)
1777                self._didModify = True
1778
1779                self.filelist.append(zinfo)
1780                self.NameToInfo[zinfo.filename] = zinfo
1781                self.fp.write(zinfo.FileHeader(False))
1782                self.start_dir = self.fp.tell()
1783        else:
1784            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1785                shutil.copyfileobj(src, dest, 1024*8)
1786
1787    def writestr(self, zinfo_or_arcname, data,
1788                 compress_type=None, compresslevel=None):
1789        """Write a file into the archive.  The contents is 'data', which
1790        may be either a 'str' or a 'bytes' instance; if it is a 'str',
1791        it is encoded as UTF-8 first.
1792        'zinfo_or_arcname' is either a ZipInfo instance or
1793        the name of the file in the archive."""
1794        if isinstance(data, str):
1795            data = data.encode("utf-8")
1796        if not isinstance(zinfo_or_arcname, ZipInfo):
1797            zinfo = ZipInfo(filename=zinfo_or_arcname,
1798                            date_time=time.localtime(time.time())[:6])
1799            zinfo.compress_type = self.compression
1800            zinfo._compresslevel = self.compresslevel
1801            if zinfo.filename[-1] == '/':
1802                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1803                zinfo.external_attr |= 0x10           # MS-DOS directory flag
1804            else:
1805                zinfo.external_attr = 0o600 << 16     # ?rw-------
1806        else:
1807            zinfo = zinfo_or_arcname
1808
1809        if not self.fp:
1810            raise ValueError(
1811                "Attempt to write to ZIP archive that was already closed")
1812        if self._writing:
1813            raise ValueError(
1814                "Can't write to ZIP archive while an open writing handle exists."
1815            )
1816
1817        if compress_type is not None:
1818            zinfo.compress_type = compress_type
1819
1820        if compresslevel is not None:
1821            zinfo._compresslevel = compresslevel
1822
1823        zinfo.file_size = len(data)            # Uncompressed size
1824        with self._lock:
1825            with self.open(zinfo, mode='w') as dest:
1826                dest.write(data)
1827
1828    def __del__(self):
1829        """Call the "close()" method in case the user forgot."""
1830        self.close()
1831
1832    def close(self):
1833        """Close the file, and for mode 'w', 'x' and 'a' write the ending
1834        records."""
1835        if self.fp is None:
1836            return
1837
1838        if self._writing:
1839            raise ValueError("Can't close the ZIP file while there is "
1840                             "an open writing handle on it. "
1841                             "Close the writing handle before closing the zip.")
1842
1843        try:
1844            if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1845                with self._lock:
1846                    if self._seekable:
1847                        self.fp.seek(self.start_dir)
1848                    self._write_end_record()
1849        finally:
1850            fp = self.fp
1851            self.fp = None
1852            self._fpclose(fp)
1853
1854    def _write_end_record(self):
1855        for zinfo in self.filelist:         # write central directory
1856            dt = zinfo.date_time
1857            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1858            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1859            extra = []
1860            if zinfo.file_size > ZIP64_LIMIT \
1861               or zinfo.compress_size > ZIP64_LIMIT:
1862                extra.append(zinfo.file_size)
1863                extra.append(zinfo.compress_size)
1864                file_size = 0xffffffff
1865                compress_size = 0xffffffff
1866            else:
1867                file_size = zinfo.file_size
1868                compress_size = zinfo.compress_size
1869
1870            if zinfo.header_offset > ZIP64_LIMIT:
1871                extra.append(zinfo.header_offset)
1872                header_offset = 0xffffffff
1873            else:
1874                header_offset = zinfo.header_offset
1875
1876            extra_data = zinfo.extra
1877            min_version = 0
1878            if extra:
1879                # Append a ZIP64 field to the extra's
1880                extra_data = _strip_extra(extra_data, (1,))
1881                extra_data = struct.pack(
1882                    '<HH' + 'Q'*len(extra),
1883                    1, 8*len(extra), *extra) + extra_data
1884
1885                min_version = ZIP64_VERSION
1886
1887            if zinfo.compress_type == ZIP_BZIP2:
1888                min_version = max(BZIP2_VERSION, min_version)
1889            elif zinfo.compress_type == ZIP_LZMA:
1890                min_version = max(LZMA_VERSION, min_version)
1891
1892            extract_version = max(min_version, zinfo.extract_version)
1893            create_version = max(min_version, zinfo.create_version)
1894            try:
1895                filename, flag_bits = zinfo._encodeFilenameFlags()
1896                centdir = struct.pack(structCentralDir,
1897                                      stringCentralDir, create_version,
1898                                      zinfo.create_system, extract_version, zinfo.reserved,
1899                                      flag_bits, zinfo.compress_type, dostime, dosdate,
1900                                      zinfo.CRC, compress_size, file_size,
1901                                      len(filename), len(extra_data), len(zinfo.comment),
1902                                      0, zinfo.internal_attr, zinfo.external_attr,
1903                                      header_offset)
1904            except DeprecationWarning:
1905                print((structCentralDir, stringCentralDir, create_version,
1906                       zinfo.create_system, extract_version, zinfo.reserved,
1907                       zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1908                       zinfo.CRC, compress_size, file_size,
1909                       len(zinfo.filename), len(extra_data), len(zinfo.comment),
1910                       0, zinfo.internal_attr, zinfo.external_attr,
1911                       header_offset), file=sys.stderr)
1912                raise
1913            self.fp.write(centdir)
1914            self.fp.write(filename)
1915            self.fp.write(extra_data)
1916            self.fp.write(zinfo.comment)
1917
1918        pos2 = self.fp.tell()
1919        # Write end-of-zip-archive record
1920        centDirCount = len(self.filelist)
1921        centDirSize = pos2 - self.start_dir
1922        centDirOffset = self.start_dir
1923        requires_zip64 = None
1924        if centDirCount > ZIP_FILECOUNT_LIMIT:
1925            requires_zip64 = "Files count"
1926        elif centDirOffset > ZIP64_LIMIT:
1927            requires_zip64 = "Central directory offset"
1928        elif centDirSize > ZIP64_LIMIT:
1929            requires_zip64 = "Central directory size"
1930        if requires_zip64:
1931            # Need to write the ZIP64 end-of-archive records
1932            if not self._allowZip64:
1933                raise LargeZipFile(requires_zip64 +
1934                                   " would require ZIP64 extensions")
1935            zip64endrec = struct.pack(
1936                structEndArchive64, stringEndArchive64,
1937                44, 45, 45, 0, 0, centDirCount, centDirCount,
1938                centDirSize, centDirOffset)
1939            self.fp.write(zip64endrec)
1940
1941            zip64locrec = struct.pack(
1942                structEndArchive64Locator,
1943                stringEndArchive64Locator, 0, pos2, 1)
1944            self.fp.write(zip64locrec)
1945            centDirCount = min(centDirCount, 0xFFFF)
1946            centDirSize = min(centDirSize, 0xFFFFFFFF)
1947            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1948
1949        endrec = struct.pack(structEndArchive, stringEndArchive,
1950                             0, 0, centDirCount, centDirCount,
1951                             centDirSize, centDirOffset, len(self._comment))
1952        self.fp.write(endrec)
1953        self.fp.write(self._comment)
1954        self.fp.flush()
1955
1956    def _fpclose(self, fp):
1957        assert self._fileRefCnt > 0
1958        self._fileRefCnt -= 1
1959        if not self._fileRefCnt and not self._filePassed:
1960            fp.close()
1961
1962
1963class PyZipFile(ZipFile):
1964    """Class to create ZIP archives with Python library files and packages."""
1965
1966    def __init__(self, file, mode="r", compression=ZIP_STORED,
1967                 allowZip64=True, optimize=-1):
1968        ZipFile.__init__(self, file, mode=mode, compression=compression,
1969                         allowZip64=allowZip64)
1970        self._optimize = optimize
1971
1972    def writepy(self, pathname, basename="", filterfunc=None):
1973        """Add all files from "pathname" to the ZIP archive.
1974
1975        If pathname is a package directory, search the directory and
1976        all package subdirectories recursively for all *.py and enter
1977        the modules into the archive.  If pathname is a plain
1978        directory, listdir *.py and enter all modules.  Else, pathname
1979        must be a Python *.py file and the module will be put into the
1980        archive.  Added modules are always module.pyc.
1981        This method will compile the module.py into module.pyc if
1982        necessary.
1983        If filterfunc(pathname) is given, it is called with every argument.
1984        When it is False, the file or directory is skipped.
1985        """
1986        pathname = os.fspath(pathname)
1987        if filterfunc and not filterfunc(pathname):
1988            if self.debug:
1989                label = 'path' if os.path.isdir(pathname) else 'file'
1990                print('%s %r skipped by filterfunc' % (label, pathname))
1991            return
1992        dir, name = os.path.split(pathname)
1993        if os.path.isdir(pathname):
1994            initname = os.path.join(pathname, "__init__.py")
1995            if os.path.isfile(initname):
1996                # This is a package directory, add it
1997                if basename:
1998                    basename = "%s/%s" % (basename, name)
1999                else:
2000                    basename = name
2001                if self.debug:
2002                    print("Adding package in", pathname, "as", basename)
2003                fname, arcname = self._get_codename(initname[0:-3], basename)
2004                if self.debug:
2005                    print("Adding", arcname)
2006                self.write(fname, arcname)
2007                dirlist = sorted(os.listdir(pathname))
2008                dirlist.remove("__init__.py")
2009                # Add all *.py files and package subdirectories
2010                for filename in dirlist:
2011                    path = os.path.join(pathname, filename)
2012                    root, ext = os.path.splitext(filename)
2013                    if os.path.isdir(path):
2014                        if os.path.isfile(os.path.join(path, "__init__.py")):
2015                            # This is a package directory, add it
2016                            self.writepy(path, basename,
2017                                         filterfunc=filterfunc)  # Recursive call
2018                    elif ext == ".py":
2019                        if filterfunc and not filterfunc(path):
2020                            if self.debug:
2021                                print('file %r skipped by filterfunc' % path)
2022                            continue
2023                        fname, arcname = self._get_codename(path[0:-3],
2024                                                            basename)
2025                        if self.debug:
2026                            print("Adding", arcname)
2027                        self.write(fname, arcname)
2028            else:
2029                # This is NOT a package directory, add its files at top level
2030                if self.debug:
2031                    print("Adding files from directory", pathname)
2032                for filename in sorted(os.listdir(pathname)):
2033                    path = os.path.join(pathname, filename)
2034                    root, ext = os.path.splitext(filename)
2035                    if ext == ".py":
2036                        if filterfunc and not filterfunc(path):
2037                            if self.debug:
2038                                print('file %r skipped by filterfunc' % path)
2039                            continue
2040                        fname, arcname = self._get_codename(path[0:-3],
2041                                                            basename)
2042                        if self.debug:
2043                            print("Adding", arcname)
2044                        self.write(fname, arcname)
2045        else:
2046            if pathname[-3:] != ".py":
2047                raise RuntimeError(
2048                    'Files added with writepy() must end with ".py"')
2049            fname, arcname = self._get_codename(pathname[0:-3], basename)
2050            if self.debug:
2051                print("Adding file", arcname)
2052            self.write(fname, arcname)
2053
2054    def _get_codename(self, pathname, basename):
2055        """Return (filename, archivename) for the path.
2056
2057        Given a module name path, return the correct file path and
2058        archive name, compiling if necessary.  For example, given
2059        /python/lib/string, return (/python/lib/string.pyc, string).
2060        """
2061        def _compile(file, optimize=-1):
2062            import py_compile
2063            if self.debug:
2064                print("Compiling", file)
2065            try:
2066                py_compile.compile(file, doraise=True, optimize=optimize)
2067            except py_compile.PyCompileError as err:
2068                print(err.msg)
2069                return False
2070            return True
2071
2072        file_py  = pathname + ".py"
2073        file_pyc = pathname + ".pyc"
2074        pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2075        pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2076        pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2077        if self._optimize == -1:
2078            # legacy mode: use whatever file is present
2079            if (os.path.isfile(file_pyc) and
2080                  os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2081                # Use .pyc file.
2082                arcname = fname = file_pyc
2083            elif (os.path.isfile(pycache_opt0) and
2084                  os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2085                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2086                # file name in the archive.
2087                fname = pycache_opt0
2088                arcname = file_pyc
2089            elif (os.path.isfile(pycache_opt1) and
2090                  os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2091                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2092                # file name in the archive.
2093                fname = pycache_opt1
2094                arcname = file_pyc
2095            elif (os.path.isfile(pycache_opt2) and
2096                  os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2097                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2098                # file name in the archive.
2099                fname = pycache_opt2
2100                arcname = file_pyc
2101            else:
2102                # Compile py into PEP 3147 pyc file.
2103                if _compile(file_py):
2104                    if sys.flags.optimize == 0:
2105                        fname = pycache_opt0
2106                    elif sys.flags.optimize == 1:
2107                        fname = pycache_opt1
2108                    else:
2109                        fname = pycache_opt2
2110                    arcname = file_pyc
2111                else:
2112                    fname = arcname = file_py
2113        else:
2114            # new mode: use given optimization level
2115            if self._optimize == 0:
2116                fname = pycache_opt0
2117                arcname = file_pyc
2118            else:
2119                arcname = file_pyc
2120                if self._optimize == 1:
2121                    fname = pycache_opt1
2122                elif self._optimize == 2:
2123                    fname = pycache_opt2
2124                else:
2125                    msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2126                    raise ValueError(msg)
2127            if not (os.path.isfile(fname) and
2128                    os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2129                if not _compile(file_py, optimize=self._optimize):
2130                    fname = arcname = file_py
2131        archivename = os.path.split(arcname)[1]
2132        if basename:
2133            archivename = "%s/%s" % (basename, archivename)
2134        return (fname, archivename)
2135
2136
2137def _parents(path):
2138    """
2139    Given a path with elements separated by
2140    posixpath.sep, generate all parents of that path.
2141
2142    >>> list(_parents('b/d'))
2143    ['b']
2144    >>> list(_parents('/b/d/'))
2145    ['/b']
2146    >>> list(_parents('b/d/f/'))
2147    ['b/d', 'b']
2148    >>> list(_parents('b'))
2149    []
2150    >>> list(_parents(''))
2151    []
2152    """
2153    return itertools.islice(_ancestry(path), 1, None)
2154
2155
2156def _ancestry(path):
2157    """
2158    Given a path with elements separated by
2159    posixpath.sep, generate all elements of that path
2160
2161    >>> list(_ancestry('b/d'))
2162    ['b/d', 'b']
2163    >>> list(_ancestry('/b/d/'))
2164    ['/b/d', '/b']
2165    >>> list(_ancestry('b/d/f/'))
2166    ['b/d/f', 'b/d', 'b']
2167    >>> list(_ancestry('b'))
2168    ['b']
2169    >>> list(_ancestry(''))
2170    []
2171    """
2172    path = path.rstrip(posixpath.sep)
2173    while path and path != posixpath.sep:
2174        yield path
2175        path, tail = posixpath.split(path)
2176
2177
2178_dedupe = dict.fromkeys
2179"""Deduplicate an iterable in original order"""
2180
2181
2182def _difference(minuend, subtrahend):
2183    """
2184    Return items in minuend not in subtrahend, retaining order
2185    with O(1) lookup.
2186    """
2187    return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2188
2189
2190class CompleteDirs(ZipFile):
2191    """
2192    A ZipFile subclass that ensures that implied directories
2193    are always included in the namelist.
2194    """
2195
2196    @staticmethod
2197    def _implied_dirs(names):
2198        parents = itertools.chain.from_iterable(map(_parents, names))
2199        as_dirs = (p + posixpath.sep for p in parents)
2200        return _dedupe(_difference(as_dirs, names))
2201
2202    def namelist(self):
2203        names = super(CompleteDirs, self).namelist()
2204        return names + list(self._implied_dirs(names))
2205
2206    def _name_set(self):
2207        return set(self.namelist())
2208
2209    def resolve_dir(self, name):
2210        """
2211        If the name represents a directory, return that name
2212        as a directory (with the trailing slash).
2213        """
2214        names = self._name_set()
2215        dirname = name + '/'
2216        dir_match = name not in names and dirname in names
2217        return dirname if dir_match else name
2218
2219    @classmethod
2220    def make(cls, source):
2221        """
2222        Given a source (filename or zipfile), return an
2223        appropriate CompleteDirs subclass.
2224        """
2225        if isinstance(source, CompleteDirs):
2226            return source
2227
2228        if not isinstance(source, ZipFile):
2229            return cls(source)
2230
2231        # Only allow for FastPath when supplied zipfile is read-only
2232        if 'r' not in source.mode:
2233            cls = CompleteDirs
2234
2235        res = cls.__new__(cls)
2236        vars(res).update(vars(source))
2237        return res
2238
2239
2240class FastLookup(CompleteDirs):
2241    """
2242    ZipFile subclass to ensure implicit
2243    dirs exist and are resolved rapidly.
2244    """
2245    def namelist(self):
2246        with contextlib.suppress(AttributeError):
2247            return self.__names
2248        self.__names = super(FastLookup, self).namelist()
2249        return self.__names
2250
2251    def _name_set(self):
2252        with contextlib.suppress(AttributeError):
2253            return self.__lookup
2254        self.__lookup = super(FastLookup, self)._name_set()
2255        return self.__lookup
2256
2257
2258class Path:
2259    """
2260    A pathlib-compatible interface for zip files.
2261
2262    Consider a zip file with this structure::
2263
2264        .
2265        ├── a.txt
2266        └── b
2267            ├── c.txt
2268            └── d
2269                └── e.txt
2270
2271    >>> data = io.BytesIO()
2272    >>> zf = ZipFile(data, 'w')
2273    >>> zf.writestr('a.txt', 'content of a')
2274    >>> zf.writestr('b/c.txt', 'content of c')
2275    >>> zf.writestr('b/d/e.txt', 'content of e')
2276    >>> zf.filename = 'abcde.zip'
2277
2278    Path accepts the zipfile object itself or a filename
2279
2280    >>> root = Path(zf)
2281
2282    From there, several path operations are available.
2283
2284    Directory iteration (including the zip file itself):
2285
2286    >>> a, b = root.iterdir()
2287    >>> a
2288    Path('abcde.zip', 'a.txt')
2289    >>> b
2290    Path('abcde.zip', 'b/')
2291
2292    name property:
2293
2294    >>> b.name
2295    'b'
2296
2297    join with divide operator:
2298
2299    >>> c = b / 'c.txt'
2300    >>> c
2301    Path('abcde.zip', 'b/c.txt')
2302    >>> c.name
2303    'c.txt'
2304
2305    Read text:
2306
2307    >>> c.read_text()
2308    'content of c'
2309
2310    existence:
2311
2312    >>> c.exists()
2313    True
2314    >>> (b / 'missing.txt').exists()
2315    False
2316
2317    Coercion to string:
2318
2319    >>> str(c)
2320    'abcde.zip/b/c.txt'
2321    """
2322
2323    __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2324
2325    def __init__(self, root, at=""):
2326        self.root = FastLookup.make(root)
2327        self.at = at
2328
2329    @property
2330    def open(self):
2331        return functools.partial(self.root.open, self.at)
2332
2333    @property
2334    def name(self):
2335        return posixpath.basename(self.at.rstrip("/"))
2336
2337    def read_text(self, *args, **kwargs):
2338        with self.open() as strm:
2339            return io.TextIOWrapper(strm, *args, **kwargs).read()
2340
2341    def read_bytes(self):
2342        with self.open() as strm:
2343            return strm.read()
2344
2345    def _is_child(self, path):
2346        return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2347
2348    def _next(self, at):
2349        return Path(self.root, at)
2350
2351    def is_dir(self):
2352        return not self.at or self.at.endswith("/")
2353
2354    def is_file(self):
2355        return not self.is_dir()
2356
2357    def exists(self):
2358        return self.at in self.root._name_set()
2359
2360    def iterdir(self):
2361        if not self.is_dir():
2362            raise ValueError("Can't listdir a file")
2363        subs = map(self._next, self.root.namelist())
2364        return filter(self._is_child, subs)
2365
2366    def __str__(self):
2367        return posixpath.join(self.root.filename, self.at)
2368
2369    def __repr__(self):
2370        return self.__repr.format(self=self)
2371
2372    def joinpath(self, add):
2373        next = posixpath.join(self.at, add)
2374        return self._next(self.root.resolve_dir(next))
2375
2376    __truediv__ = joinpath
2377
2378    @property
2379    def parent(self):
2380        parent_at = posixpath.dirname(self.at.rstrip('/'))
2381        if parent_at:
2382            parent_at += '/'
2383        return self._next(parent_at)
2384
2385
2386def main(args=None):
2387    import argparse
2388
2389    description = 'A simple command-line interface for zipfile module.'
2390    parser = argparse.ArgumentParser(description=description)
2391    group = parser.add_mutually_exclusive_group(required=True)
2392    group.add_argument('-l', '--list', metavar='<zipfile>',
2393                       help='Show listing of a zipfile')
2394    group.add_argument('-e', '--extract', nargs=2,
2395                       metavar=('<zipfile>', '<output_dir>'),
2396                       help='Extract zipfile into target dir')
2397    group.add_argument('-c', '--create', nargs='+',
2398                       metavar=('<name>', '<file>'),
2399                       help='Create zipfile from sources')
2400    group.add_argument('-t', '--test', metavar='<zipfile>',
2401                       help='Test if a zipfile is valid')
2402    args = parser.parse_args(args)
2403
2404    if args.test is not None:
2405        src = args.test
2406        with ZipFile(src, 'r') as zf:
2407            badfile = zf.testzip()
2408        if badfile:
2409            print("The following enclosed file is corrupted: {!r}".format(badfile))
2410        print("Done testing")
2411
2412    elif args.list is not None:
2413        src = args.list
2414        with ZipFile(src, 'r') as zf:
2415            zf.printdir()
2416
2417    elif args.extract is not None:
2418        src, curdir = args.extract
2419        with ZipFile(src, 'r') as zf:
2420            zf.extractall(curdir)
2421
2422    elif args.create is not None:
2423        zip_name = args.create.pop(0)
2424        files = args.create
2425
2426        def addToZip(zf, path, zippath):
2427            if os.path.isfile(path):
2428                zf.write(path, zippath, ZIP_DEFLATED)
2429            elif os.path.isdir(path):
2430                if zippath:
2431                    zf.write(path, zippath)
2432                for nm in sorted(os.listdir(path)):
2433                    addToZip(zf,
2434                             os.path.join(path, nm), os.path.join(zippath, nm))
2435            # else: ignore
2436
2437        with ZipFile(zip_name, 'w') as zf:
2438            for path in files:
2439                zippath = os.path.basename(path)
2440                if not zippath:
2441                    zippath = os.path.basename(os.path.dirname(path))
2442                if zippath in ('', os.curdir, os.pardir):
2443                    zippath = ''
2444                addToZip(zf, path, zippath)
2445
2446
2447if __name__ == "__main__":
2448    main()