this repo has no description
at trunk 2194 lines 70 kB view raw
1#!/usr/bin/env python3 2# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) 3"""The io module provides the Python interfaces to stream handling. The 4builtin open function is defined in this module. 5 6At the top of the I/O hierarchy is the abstract base class IOBase. It 7defines the basic interface to a stream. Note, however, that there is no 8separation between reading and writing to streams; implementations are 9allowed to raise an IOError if they do not support a given operation. 10 11Extending IOBase is RawIOBase which deals simply with the reading and 12writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide 13an interface to OS files. 14 15BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its 16subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer 17streams that are readable, writable, and both respectively. 18BufferedRandom provides a buffered interface to random access 19streams. BytesIO is a simple stream of in-memory bytes. 20 21Another IOBase subclass, TextIOBase, deals with the encoding and decoding 22of streams into text. TextIOWrapper, which extends it, is a buffered text 23interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO 24is an in-memory stream for text. 25 26Argument names are not part of the specification, and only the arguments 27of open() are intended to be used as keyword arguments.""" 28 29import builtins # noqa: F401 30from _codecs import ( 31 getincrementaldecoder as _codecs_getincrementaldecoder, 32 getincrementalencoder as _codecs_getincrementalencoder, 33) 34from _thread import LockType as _thread_Lock 35from builtins import BlockingIOError, _index, _non_heaptype, _obj_as_int, _type_name 36from errno import EAGAIN as errno_EAGAIN, EISDIR as errno_EISDIR 37 38from _builtins import ( 39 _address, 40 _builtin, 41 _bytearray_len, 42 _bytes_check, 43 _byteslike_check, 44 _float_check, 45 _int_check, 46 _memoryview_check, 47 _object_type_getattr, 48 _object_type_hasattr, 49 _os_write, 50 _str_check, 51 _str_guard, 52 _str_len, 53 _type, 54 _Unbound, 55 _unimplemented, 56 _warn, 57) 58from _os import ( 59 close as _os_close, 60 fstat_size as _os_fstat_size, 61 ftruncate as _os_ftruncate, 62 isatty as _os_isatty, 63 isdir as _os_isdir, 64 linesep as _os_linesep, 65 lseek as _os_lseek, 66 open as _os_open, 67 parse_mode as _os_parse_mode, 68 read as _os_read, 69 set_noinheritable as _os_set_noinheritable, 70) 71 72 73DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes 74 75 76def _BytesIO_closed_guard(obj): 77 _builtin() 78 79 80def _BytesIO_guard(obj): 81 _builtin() 82 83 84def _BytesIO_seek(self, pos, whence): 85 _builtin() 86 87 88def _BytesIO_truncate(self, pos): 89 _builtin() 90 91 92def _BytesIO_seek(self, offset, whence): 93 _builtin() 94 95 96def _StringIO_closed_guard(obj): 97 _builtin() 98 99 100def _StringIO_seek(self, offset, whence): 101 _builtin() 102 103 104def _TextIOWrapper_attached_guard(obj): 105 _builtin() 106 107 108def _TextIOWrapper_attached_closed_guard(obj): 109 _builtin() 110 111 112def _TextIOWrapper_attached_closed_seekable_guard(obj): 113 _builtin() 114 115 116def _TextIOWrapper_write_UTF8(self, text): 117 _builtin() 118 119 120def _buffered_reader_clear_buffer(self): 121 _builtin() 122 123 124def _buffered_reader_init(self, buffer_size): 125 _builtin() 126 127 128def _buffered_reader_peek(self, size=0): 129 _builtin() 130 131 132def _buffered_reader_read(self, size=None): 133 _builtin() 134 135 136def _buffered_reader_readline(self, size=None): 137 _builtin() 138 139 140def _detached_guard(self): 141 if self.raw is None: 142 raise ValueError("raw stream has been detached") 143 144 145def _whence_guard(whence): 146 if whence == 0 or whence == 1 or whence == 2: 147 return 148 raise ValueError("invalid whence value") 149 150 151class IncrementalNewlineDecoder(bootstrap=True): 152 def __init__(self, decoder, translate, errors="strict"): 153 if not _int_check(translate): 154 raise TypeError( 155 f"an integer is required (got type {_type(translate).__name__})" 156 ) 157 self._errors = errors 158 self._translate = translate 159 self._decoder = decoder 160 self._seennl = 0 161 self._pendingcr = False 162 163 def decode(self, input, final=False): 164 if not _int_check(final): 165 raise TypeError( 166 f"an integer is required (got type {_type(final).__name__})" 167 ) 168 # decode input (with the eventual \r from a previous pass) 169 if self._decoder is None: 170 output = input 171 else: 172 output = self._decoder.decode(input, final=bool(final)) 173 if self._pendingcr and (output or final): 174 output = "\r" + output 175 self._pendingcr = False 176 177 # retain last \r even when not translating data: 178 # then readline() is sure to get \r\n in one pass 179 if output.endswith("\r") and not final: 180 output = output[:-1] 181 self._pendingcr = True 182 183 # Record which newlines are read 184 crlf = output.count("\r\n") 185 cr = output.count("\r") - crlf 186 lf = output.count("\n") - crlf 187 self._seennl |= (lf and self._LF) | (cr and self._CR) | (crlf and self._CRLF) 188 189 if self._translate: 190 if crlf: 191 output = output.replace("\r\n", "\n") 192 if cr: 193 output = output.replace("\r", "\n") 194 195 return output 196 197 def getstate(self): 198 if self._decoder is None: 199 buf = b"" 200 flag = 0 201 else: 202 buf, flag = self._decoder.getstate() 203 flag <<= 1 204 if self._pendingcr: 205 flag |= 1 206 return buf, flag 207 208 def setstate(self, state): 209 buf, flag = state 210 self._pendingcr = bool(flag & 1) 211 if self._decoder is not None: 212 self._decoder.setstate((buf, flag >> 1)) 213 214 def reset(self): 215 self._seennl = 0 216 self._pendingcr = False 217 if self._decoder is not None: 218 self._decoder.reset() 219 220 _LF = 1 221 _CR = 2 222 _CRLF = 4 223 224 @property 225 def newlines(self): 226 return ( 227 None, 228 "\n", 229 "\r", 230 ("\r", "\n"), 231 "\r\n", 232 ("\n", "\r\n"), 233 ("\r", "\r\n"), 234 ("\r", "\n", "\r\n"), 235 )[self._seennl] 236 237 238class UnsupportedOperation(OSError, ValueError): 239 pass 240 241 242class _IOBase(bootstrap=True): 243 """The abstract base class for all I/O classes, acting on streams of 244 bytes. There is no public constructor. 245 246 This class provides default method implementations that derived classes can 247 override selectively; the default implementations represent a file that 248 cannot be read, written or seeked. 249 250 The basic type used for binary data read from or written to a file is 251 bytes. Other bytes-like objects are accepted as method arguments too. In 252 some cases (such as readinto), a writable object is required. 253 254 Note that calling any method (even inquiries) on a closed stream is 255 undefined. Implementations may raise OSError in this case. 256 """ 257 258 ### Internal ### 259 260 def _unsupported(self, name): 261 """Internal: raise an OSError exception for unsupported operations.""" 262 raise UnsupportedOperation(f"{self.__class__.__name__}.{name}() not supported") 263 264 ### Positioning ### 265 266 def seek(self, pos, whence=0): 267 """Change stream position. 268 269 Change the stream position to byte offset pos. Argument pos is 270 interpreted relative to the position indicated by whence. Values 271 for whence are ints: 272 273 * SEEK_SET=0 -- start of stream (the default); offset should be zero or 274 positive 275 * SEEK_CUR=1 -- current stream position; offset may be negative 276 * SEEK_END=2 -- end of stream; offset is usually negative 277 Some operating systems / file systems could provide additional values. 278 279 Return an int indicating the new absolute position. 280 """ 281 self._unsupported("seek") 282 283 def tell(self): 284 """Return an int indicating the current stream position.""" 285 return self.seek(0, 1) 286 287 def truncate(self, pos=None): 288 """Truncate file to size bytes. 289 290 Size defaults to the current IO position as reported by tell(). Return 291 the new size. 292 """ 293 self._unsupported("truncate") 294 295 ### Flush and close ### 296 297 def flush(self): 298 """Flush write buffers, if applicable. 299 300 This is not implemented for read-only and non-blocking streams. 301 """ 302 self._checkClosed() 303 304 def close(self): 305 """Flush and close the IO object. 306 307 This method has no effect if the file is already closed. 308 309 Note that calling any method (even inquiries) on a closed stream is 310 undefined. Implementations may raise OSError in this case. 311 """ 312 if not self._closed: 313 try: 314 self.flush() 315 finally: 316 self._closed = True 317 318 ### Inquiries ### 319 320 def seekable(self): 321 """Return a bool indicating whether object supports random access. 322 323 If False, seek(), tell() and truncate() will raise OSError. 324 This method may need to do a test seek(). 325 """ 326 return False 327 328 def _checkSeekable(self, msg=None): 329 """Internal: raise UnsupportedOperation if file is not seekable""" 330 if not self.seekable(): 331 raise UnsupportedOperation( 332 "File or stream is not seekable." if msg is None else msg 333 ) 334 335 def readable(self): 336 """Return a bool indicating whether object was opened for reading. 337 338 If False, read() will raise OSError. 339 """ 340 return False 341 342 def _checkReadable(self, msg=None): 343 """Internal: raise UnsupportedOperation if file is not readable""" 344 if not self.readable(): 345 raise UnsupportedOperation( 346 "File or stream is not readable." if msg is None else msg 347 ) 348 349 def writable(self): 350 """Return a bool indicating whether object was opened for writing. 351 352 If False, write() and truncate() will raise OSError. 353 """ 354 return False 355 356 def _checkWritable(self, msg=None): 357 """Internal: raise UnsupportedOperation if file is not writable""" 358 if not self.writable(): 359 raise UnsupportedOperation( 360 "File or stream is not writable." if msg is None else msg 361 ) 362 363 @property 364 def closed(self): 365 """closed: bool. True iff the file has been closed. 366 367 For backwards compatibility, this is a property, not a predicate. 368 """ 369 return self._closed 370 371 def _checkClosed(self, msg=None): 372 """Internal: raise a ValueError if file is closed""" 373 if self.closed: 374 raise ValueError("I/O operation on closed file." if msg is None else msg) 375 376 ### Context manager ### 377 378 def __enter__(self): # That's a forward reference 379 """Context management protocol. Returns self (an instance of IOBase). 380 381 IOBase supports the :keyword:`with` statement. In this example, fp 382 is closed after the suite of the with statement is complete: 383 384 with open('spam.txt', 'r') as fp: 385 fp.write('Spam and eggs!') 386 """ 387 self._checkClosed() 388 return self 389 390 def __exit__(self, *args): 391 """Context management protocol. Calls close()""" 392 self.close() 393 394 ### Lower-level APIs ### 395 396 def fileno(self): 397 """Returns underlying file descriptor (an int) if one exists. 398 399 An OSError is raised if the IO object does not use a file descriptor. 400 """ 401 self._unsupported("fileno") 402 403 def isatty(self): 404 """Return a bool indicating whether this is an 'interactive' stream. 405 406 Return False if it can't be determined. 407 """ 408 self._checkClosed() 409 return False 410 411 ### Readline[s] and writelines ### 412 413 def _peek_readahead(self, size): 414 readahead = self.peek(1) 415 if not readahead: 416 return 1 417 n = (readahead.find(b"\n") + 1) or len(readahead) 418 if size >= 0: 419 # TODO(T47866758): Use less generic code to do this computation 420 # since all of the types are known ahead of time. 421 n = min(n, size) 422 return n 423 424 def _const_readahead(self, size): 425 return 1 426 427 def readline(self, size=-1): 428 r"""Read and return a line of bytes from the stream. 429 430 If size is specified, at most size bytes will be read. 431 Size should be an int. 432 433 The line terminator is always b'\n' for binary files; for text 434 files, the newlines argument to open can be used to select the line 435 terminator(s) recognized. 436 """ 437 if hasattr(self, "peek"): 438 nreadahead = self._peek_readahead 439 else: 440 nreadahead = self._const_readahead 441 442 if size is None: 443 size = -1 444 elif not _int_check(size): 445 raise TypeError("size must be an integer") 446 res = bytearray() 447 while size < 0 or len(res) < size: 448 b = self.read(nreadahead(size)) 449 if not b: 450 break 451 res += b 452 if res.endswith(b"\n"): 453 break 454 return bytes(res) 455 456 def __iter__(self): 457 """IOBase (and its subclasses) support the iterator protocol, meaning 458 that an IOBase object can be iterated over yielding the lines in a 459 stream. 460 """ 461 self._checkClosed() 462 return self 463 464 def __next__(self): 465 line = self.readline() 466 if not line: 467 raise StopIteration 468 return line 469 470 def readlines(self, hint=None): 471 """Return a list of lines from the stream. 472 473 hint can be specified to control the number of lines read: no more 474 lines will be read if the total size (in bytes/characters) of all 475 lines so far exceeds hint. 476 """ 477 if hint is None or hint <= 0: 478 return list(self) 479 n = 0 480 lines = [] 481 for line in self: 482 lines.append(line) 483 n += len(line) 484 if n >= hint: 485 break 486 return lines 487 488 def writelines(self, lines): 489 self._checkClosed() 490 for line in lines: 491 self.write(line) 492 493 494class _TextIOBase(_IOBase, bootstrap=True): 495 def read(self, size=-1): 496 self._unsupported("read") 497 498 def write(self, s): 499 self._unsupported("write") 500 501 def readline(self): 502 self._unsupported("readline") 503 504 def detach(self): 505 self._unsupported("detach") 506 507 @property 508 def encoding(self): 509 return None 510 511 @property 512 def newlines(self): 513 return None 514 515 @property 516 def errors(self): 517 return None 518 519 520class _RawIOBase(_IOBase, bootstrap=True): 521 """Base class for raw binary I/O.""" 522 523 def read(self, size=-1): 524 """Read and return up to size bytes, where size is an int. 525 526 Returns an empty bytes object on EOF, or None if the object is 527 set not to block and has no data to read. 528 """ 529 if size < 0: 530 return self.readall() 531 # TODO(T47866758): This should use a mutablebytes or some other data 532 # structure to avoid so much copying and so many round-trips. Consider: 533 # 1. We create a bytearray 534 # 2. We pass that to readinto 535 # 3. readinto calls native code 536 # 4. Native code allocates some native memory to write into 537 # 5. Native code copies that native stuff out into the byte array 538 # 6. The byte array is copied out into bytes here 539 # Very slow. 540 b = bytearray(size.__index__()) 541 n = self.readinto(b) 542 if n is None: 543 return None 544 del b[n:] 545 return bytes(b) 546 547 def readall(self): 548 """Read until EOF, using multiple read() call.""" 549 res = bytearray() 550 while True: 551 data = self.read(DEFAULT_BUFFER_SIZE) 552 # data could be b'' or None 553 if not data: 554 break 555 # TODO(T47866758): This is a really sub-par readall that could 556 # stress the GC with large I/O operations. We really want a rope or 557 # similar data structure here. 558 res += data 559 if res: 560 return bytes(res) 561 # b'' or None 562 return data 563 564 def readinto(self, b): 565 """Read bytes into a pre-allocated bytes-like object b. 566 567 Returns an int representing the number of bytes read (0 for EOF), or 568 None if the object is set not to block and has no data to read. 569 """ 570 raise NotImplementedError("readinto") 571 572 def write(self, b): 573 """Write the given buffer to the IO stream. 574 575 Returns the number of bytes written, which may be less than the 576 length of b in bytes. 577 """ 578 raise NotImplementedError("write") 579 580 581class _BufferedIOBase(_IOBase, bootstrap=True): 582 """Base class for buffered IO objects. 583 584 The main difference with RawIOBase is that the read() method 585 supports omitting the size argument, and does not have a default 586 implementation that defers to readinto(). 587 588 In addition, read(), readinto() and write() may raise 589 BlockingIOError if the underlying raw stream is in non-blocking 590 mode and not ready; unlike their raw counterparts, they will never 591 return None. 592 593 A typical implementation should not inherit from a RawIOBase 594 implementation, but wrap one. 595 """ 596 597 def read(self, size=None): 598 """Read and return up to size bytes, where size is an int. 599 600 If the argument is omitted, None, or negative, reads and 601 returns all data until EOF. 602 603 If the argument is positive, and the underlying raw stream is 604 not 'interactive', multiple raw reads may be issued to satisfy 605 the byte count (unless EOF is reached first). But for 606 interactive raw streams, at most one raw read will be issued, and a 607 short result does not imply that EOF is imminent. 608 609 Returns an empty bytes array on EOF. 610 611 Raises BlockingIOError if the underlying raw stream has no 612 data at the moment. 613 """ 614 self._unsupported("read") 615 616 def read1(self, size=None): 617 """Read up to size bytes with at most one read() system call, 618 where size is an int. 619 """ 620 self._unsupported("read1") 621 622 def readinto(self, b): 623 """Read bytes into a pre-allocated bytes-like object b. 624 625 Like read(), this may issue multiple reads to the underlying raw 626 stream, unless the latter is 'interactive'. 627 628 Returns an int representing the number of bytes read (0 for EOF). 629 630 Raises BlockingIOError if the underlying raw stream has no 631 data at the moment. 632 """ 633 634 return self._readinto(b, read1=False) 635 636 def readinto1(self, b): 637 """Read bytes into buffer *b*, using at most one system call 638 639 Returns an int representing the number of bytes read (0 for EOF). 640 641 Raises BlockingIOError if the underlying raw stream has no 642 data at the moment. 643 """ 644 645 return self._readinto(b, read1=True) 646 647 def _readinto(self, b, read1): 648 if not _memoryview_check(b): 649 b = memoryview(b) 650 # TODO(emacs): Here and throughout this file, come up with a better 651 # buffer / byteslike name than "b" 652 b = b.cast("B") 653 654 if read1: 655 data = self.read1(len(b)) 656 else: 657 data = self.read(len(b)) 658 n = len(data) 659 660 # TODO(T47880928): Implement memoryview.__setitem__ and 661 # _memoryview_setslice so we can use that here 662 b[:n] = data 663 664 return n 665 666 def write(self, b): 667 """Write the given bytes buffer to the IO stream. 668 669 Return the number of bytes written, which is always the length of b 670 in bytes. 671 672 Raises BlockingIOError if the buffer is full and the 673 underlying raw stream cannot accept more data at the moment. 674 """ 675 self._unsupported("write") 676 677 def detach(self): 678 """ 679 Separate the underlying raw stream from the buffer and return it. 680 681 After the raw stream has been detached, the buffer is in an unusable 682 state. 683 """ 684 self._unsupported("detach") 685 686 687class _BufferedIOMixin(_BufferedIOBase, bootstrap=True): 688 def __init__(self, raw): 689 self._raw = raw 690 691 def __repr__(self): 692 try: 693 name = self.name 694 except Exception: 695 return f"<{_type_name(self.__class__)}>" 696 else: 697 return f"<{_type_name(self.__class__)} name={name!r}>" 698 699 ### Positioning ### 700 701 def seek(self, pos, whence=0): 702 _detached_guard(self) 703 _whence_guard(whence) 704 new_position = self.raw.seek(pos, whence) 705 if new_position < 0: 706 raise OSError("seek() returned an invalid position") 707 return new_position 708 709 def tell(self): 710 _detached_guard(self) 711 pos = self.raw.tell() 712 if pos < 0: 713 raise OSError("tell() returned an invalid position") 714 return pos 715 716 def truncate(self, pos=None): 717 # Flush the stream. We're mixing buffered I/O with lower-level I/O, 718 # and a flush may be necessary to synch both views of the current 719 # file state. 720 self.flush() 721 722 if pos is None: 723 pos = self.tell() 724 return self.raw.truncate(pos) 725 726 ### Flush and close ### 727 728 def flush(self): 729 if self.closed: 730 raise ValueError("flush of closed file") 731 self.raw.flush() 732 733 def close(self): 734 if not self.closed: 735 try: 736 # may raise BlockingIOError or BrokenPipeError etc 737 self.flush() 738 finally: 739 self.raw.close() 740 741 def detach(self): 742 _detached_guard(self) 743 self.flush() 744 raw = self._raw 745 self._raw = None 746 return raw 747 748 ### Inquiries ### 749 750 def readable(self): 751 _detached_guard(self) 752 return self.raw.readable() 753 754 def seekable(self): 755 _detached_guard(self) 756 return self.raw.seekable() 757 758 def writable(self): 759 _detached_guard(self) 760 return self.raw.writable() 761 762 @property 763 def raw(self): 764 return self._raw 765 766 @property 767 def closed(self): 768 _detached_guard(self) 769 return self.raw.closed 770 771 @property 772 def name(self): 773 _detached_guard(self) 774 return self.raw.name 775 776 @property 777 def mode(self): 778 _detached_guard(self) 779 return self.raw.mode 780 781 ### Lower-level APIs ### 782 783 def fileno(self): 784 _detached_guard(self) 785 return self.raw.fileno() 786 787 def isatty(self): 788 _detached_guard(self) 789 return self.raw.isatty() 790 791 792class BufferedRWPair(_BufferedIOBase, metaclass=_non_heaptype): 793 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE): 794 if not reader.readable(): 795 raise UnsupportedOperation('"reader" argument must be readable.') 796 797 if not writer.writable(): 798 raise UnsupportedOperation('"writer" argument must be writable.') 799 800 self.reader = BufferedReader(reader, buffer_size) 801 self.writer = BufferedWriter(writer, buffer_size) 802 803 def close(self): 804 try: 805 self.writer.close() 806 finally: 807 self.reader.close() 808 809 @property 810 def closed(self): 811 return self.writer.closed 812 813 def flush(self): 814 return self.writer.flush() 815 816 def isatty(self): 817 return self.writer.isatty() or self.reader.isatty() 818 819 def peek(self, size=0): 820 return self.reader.peek(size) 821 822 def read(self, size=None): 823 if size is None: 824 size = -1 825 return self.reader.read(size) 826 827 def read1(self, size): 828 return self.reader.read1(size) 829 830 def readable(self): 831 return self.reader.readable() 832 833 def readinto(self, b): 834 return self.reader.readinto(b) 835 836 def readinto1(self, b): 837 return self.reader.readinto1(b) 838 839 def writable(self): 840 return self.writer.writable() 841 842 def write(self, b): 843 return self.writer.write(b) 844 845 846class BufferedRandom(_BufferedIOMixin, bootstrap=True): 847 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 848 if not raw.seekable(): 849 raise UnsupportedOperation("File or stream is not seekable.") 850 if not raw.writable(): 851 raise UnsupportedOperation("File or stream is not writable.") 852 853 _BufferedIOMixin.__init__(self, raw) 854 if buffer_size <= 0: 855 raise ValueError("buffer size must be strictly positive") 856 self._reader = BufferedReader(raw, buffer_size) 857 self.buffer_size = buffer_size 858 self._write_lock = _thread_Lock() 859 self._write_buf = bytearray() # TODO(T47880928): use a memoryview 860 861 def _flush_unlocked(self): 862 return BufferedWriter._flush_unlocked(self) 863 864 def _readinto(self, buf, read1): 865 return self._reader.readinto(buf, read1) 866 867 def close(self): 868 _BufferedIOMixin.close(self) 869 self._reader.close() 870 871 def detach(self): 872 raw = _BufferedIOMixin.detach(self) 873 self._reader.detach() 874 return raw 875 876 def flush(self): 877 with self._write_lock: 878 self._flush_unlocked() 879 880 def peek(self, size=0): 881 if self.closed: 882 raise ValueError("peek of closed file") 883 self.flush() 884 return _buffered_reader_peek(self._reader, size) 885 886 def read(self, size=None): 887 if self.closed: 888 raise ValueError("read of closed file") 889 self.flush() 890 return _buffered_reader_read(self._reader, size) 891 892 def readline(self, size=None): 893 if self.closed: 894 raise ValueError("read of closed file") 895 self.flush() 896 return _buffered_reader_readline(self._reader, size) 897 898 def read1(self, size): 899 if self.closed: 900 raise ValueError("read of closed file") 901 self.flush() 902 return self._reader.read1(size) 903 904 def readinto(self, b): 905 self.flush() 906 return self._reader.readinto(b) 907 908 def readinto1(self, b): 909 self.flush() 910 return self._reader.readinto1(b) 911 912 def seek(self, pos, whence=0): 913 if self.closed: 914 raise ValueError("seek of closed file") 915 self.flush() 916 return self._reader.seek(pos, whence) 917 918 def tell(self): 919 if self._write_buf: 920 return BufferedWriter.tell(self) 921 return self._reader.tell() 922 923 def truncate(self, pos=None): 924 return BufferedWriter.truncate(self, self.tell() if pos is None else pos) 925 926 def write(self, b): 927 if self.closed: 928 raise ValueError("write to closed file") 929 # reset read buffer 930 reader = self._reader 931 _buffered_reader_clear_buffer(reader) 932 return BufferedWriter.write(self, b) 933 934 935class BufferedReader(_BufferedIOMixin, bootstrap=True): 936 def close(self): 937 _buffered_reader_clear_buffer(self) 938 return _BufferedIOMixin.close(self) 939 940 def detach(self): 941 _buffered_reader_clear_buffer(self) 942 return _BufferedIOMixin.detach(self) 943 944 peek = _buffered_reader_peek 945 946 read = _buffered_reader_read 947 948 def read1(self, size=-1): 949 if size < 0: 950 size = self._buffer_size 951 if size == 0: 952 return b"" 953 # Returns up to size bytes. If at least one byte is buffered, we only 954 # return buffered bytes. Otherwise, we do one raw read. 955 _buffered_reader_peek(self, 1) 956 available = self._buffer_num_bytes - self._read_pos 957 return _buffered_reader_read(self, min(size, available)) 958 959 readline = _buffered_reader_readline 960 961 def seek(self, pos, whence=0): 962 _whence_guard(whence) 963 if whence == 1: 964 pos -= self._buffer_num_bytes - self._read_pos 965 _buffered_reader_clear_buffer(self) 966 return _BufferedIOMixin.seek(self, pos, whence) 967 968 def tell(self): 969 return _BufferedIOMixin.tell(self) - self._buffer_num_bytes + self._read_pos 970 971 # TODO(emacs): Write an optimized _readinto 972 973 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 974 if not raw.readable(): 975 raise UnsupportedOperation("File or stream is not readable.") 976 977 _BufferedIOMixin.__init__(self, raw) 978 buffer_size = _index(buffer_size) 979 if buffer_size <= 0: 980 raise ValueError("invalid buffer size") 981 _buffered_reader_init(self, buffer_size) 982 983 984class BufferedWriter(_BufferedIOMixin, bootstrap=True): 985 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 986 if not raw.writable(): 987 raise UnsupportedOperation("File or stream is not writable.") 988 989 _BufferedIOMixin.__init__(self, raw) 990 if buffer_size <= 0: 991 raise ValueError("buffer size must be strictly positive") 992 self.buffer_size = buffer_size 993 self._write_buf = bytearray() # TODO(T47880928): use a memoryview 994 self._write_lock = _thread_Lock() 995 996 def _flush_unlocked(self): 997 if self.closed: 998 raise ValueError("flush of closed file") 999 while self._write_buf: 1000 try: 1001 n = self.raw.write(self._write_buf) 1002 except BlockingIOError: 1003 raise RuntimeError( 1004 "self.raw should implement RawIOBase: " 1005 "it should not raise BlockingIOError" 1006 ) 1007 if n is None: 1008 raise BlockingIOError( 1009 errno_EAGAIN, "write could not complete without blocking", 0 1010 ) 1011 if n < 0 or n > _bytearray_len(self._write_buf): 1012 raise IOError( 1013 f"raw write() returned invalid length {n} (should have " 1014 f"been between 0 and {_bytearray_len(self._write_buf)})" 1015 ) 1016 del self._write_buf[:n] 1017 1018 def flush(self): 1019 with self._write_lock: 1020 self._flush_unlocked() 1021 1022 def seek(self, pos, whence=0): 1023 _whence_guard(whence) 1024 with self._write_lock: 1025 self._flush_unlocked() 1026 return _BufferedIOMixin.seek(self, pos, whence) 1027 1028 def tell(self): 1029 return _BufferedIOMixin.tell(self) + _bytearray_len(self._write_buf) 1030 1031 def truncate(self, pos=None): 1032 with self._write_lock: 1033 self._flush_unlocked() 1034 if pos is None: 1035 pos = self.raw.tell() 1036 return self.raw.truncate(pos) 1037 1038 def writable(self): 1039 return self.raw.writable() 1040 1041 def write(self, b): 1042 if self.closed: 1043 raise ValueError("write to closed file") 1044 if _str_check(b): 1045 raise TypeError("can't write str to binary stream") 1046 with self._write_lock: 1047 if _bytearray_len(self._write_buf) > self.buffer_size: 1048 # We're full, so let's pre-flush the buffer. (This may raise 1049 # BlockingIOError with characters_written == 0.) 1050 self._flush_unlocked() 1051 before = _bytearray_len(self._write_buf) 1052 self._write_buf.extend(b) 1053 written = _bytearray_len(self._write_buf) - before 1054 if _bytearray_len(self._write_buf) > self.buffer_size: 1055 try: 1056 self._flush_unlocked() 1057 except BlockingIOError as e: 1058 if _bytearray_len(self._write_buf) > self.buffer_size: 1059 # We've hit the buffer_size. We have to accept a partial 1060 # write and cut back our buffer. 1061 overage = _bytearray_len(self._write_buf) - self.buffer_size 1062 written -= overage 1063 self._write_buf = self._write_buf[: self.buffer_size] 1064 raise BlockingIOError(e.errno, e.strerror, written) 1065 return written 1066 1067 1068class BytesIO(_BufferedIOBase, bootstrap=True): 1069 """Buffered I/O implementation using an in-memory bytes buffer.""" 1070 1071 def __init__(self, initial_bytes=None): 1072 _builtin() 1073 1074 def __getstate__(self): 1075 _unimplemented("BytesIO.__getstate__") 1076 # if self.closed: 1077 # raise ValueError("__getstate__ on closed file") 1078 # return (self.getvalue(), self._pos, self.__dict__.copy()) 1079 1080 def getvalue(self): 1081 """Return the bytes value (contents) of the buffer""" 1082 _builtin() 1083 1084 def getbuffer(self): 1085 """Return a readable and writable view of the buffer.""" 1086 _BytesIO_closed_guard(self) 1087 return memoryview(self._buffer) 1088 1089 def close(self): 1090 _BytesIO_guard(self) 1091 self._closed = True 1092 self._buffer = None 1093 1094 def read(self, size=-1): 1095 _builtin() 1096 1097 read1 = read 1098 1099 def write(self, b): 1100 _builtin() 1101 1102 def seek(self, pos, whence=0): 1103 result = _BytesIO_seek(self, pos, whence) 1104 if result is not _Unbound: 1105 return result 1106 return _BytesIO_seek(self, _index(pos), _obj_as_int(whence)) 1107 1108 def tell(self): 1109 _BytesIO_closed_guard(self) 1110 return self._pos 1111 1112 def truncate(self, pos=None): 1113 if pos is None: 1114 return _BytesIO_truncate(self, pos) 1115 return _BytesIO_truncate(self, _index(pos)) 1116 1117 def readable(self): 1118 _BytesIO_closed_guard(self) 1119 return True 1120 1121 def writable(self): 1122 _BytesIO_closed_guard(self) 1123 return True 1124 1125 def seekable(self): 1126 _BytesIO_closed_guard(self) 1127 return True 1128 1129 @property 1130 def closed(self): 1131 return self._closed 1132 1133 1134class FileIO(_RawIOBase, bootstrap=True): 1135 def __init__(self, file, mode="r", closefd=True, opener=None): # noqa: C901 1136 if _float_check(file): 1137 raise TypeError("integer argument expected, got float") 1138 fd = -1 1139 if _int_check(file): 1140 if file < 0: 1141 raise ValueError("negative file descriptor") 1142 fd = file 1143 1144 if not _str_check(mode): 1145 raise TypeError(f"invalid mode for FileIO: {mode!s}") 1146 mode_set = frozenset(mode) 1147 if not mode_set <= frozenset("xrwab+"): 1148 raise ValueError(f"invalid mode: {mode!s}") 1149 # Is mode non empty, with exactly one of r, w, a, or x, and maybe a + 1150 # i.e. it should match [rwax]\+? 1151 if sum(c in "rwax" for c in mode) != 1 or mode.count("+") > 1: 1152 raise ValueError( 1153 "Must have exactly one of create/read/write/append " 1154 "mode and at most one plus" 1155 ) 1156 1157 appending = False 1158 created = False 1159 closed = False 1160 readable = False 1161 seekable = None 1162 writable = False 1163 1164 if "x" in mode: 1165 created = True 1166 writable = True 1167 elif "r" in mode: 1168 readable = True 1169 elif "w" in mode: 1170 writable = True 1171 elif "a" in mode: 1172 writable = True 1173 appending = True 1174 1175 if "+" in mode: 1176 readable = True 1177 writable = True 1178 1179 flags = _os_parse_mode(mode) 1180 self.name = file 1181 1182 # TODO(T86943617): call sys.audit 1183 1184 if fd < 0: 1185 # file was not an int, so we have to open it 1186 if not closefd: 1187 raise ValueError("Cannot use closefd=False with file name") 1188 if opener is None: 1189 fd = _os_open(file, flags, 0o666) 1190 else: 1191 fd = opener(file, flags) 1192 if not _int_check(fd): 1193 raise TypeError("expected integer from opener") 1194 if fd < 0: 1195 raise ValueError(f"opener returned {fd}") 1196 1197 try: 1198 if opener: 1199 _os_set_noinheritable(fd) 1200 1201 if _os_isdir(fd): 1202 raise IsADirectoryError(errno_EISDIR, "Is a directory") 1203 1204 # TODO(T52792779): Don't translate newlines if _setmode is non-None 1205 # by setting O_BINARY 1206 1207 if appending: 1208 # For consistent behavior, we explicitly seek to the end of 1209 # file (otherwise, it might be done only on the first write()). 1210 _os_lseek(fd, 0, 2) 1211 except Exception: 1212 _os_close(fd) 1213 raise 1214 1215 self._fd = fd 1216 self._closed = closed 1217 self._closefd = closefd 1218 self._appending = appending 1219 self._created = created 1220 self._readable = readable 1221 self._seekable = seekable 1222 self._writable = writable 1223 1224 def __del__(self): 1225 if not self.closed and self._closefd: 1226 _warn(f"unclosed file {self!r}", ResourceWarning, stacklevel=2, source=self) 1227 self.close() 1228 1229 def __repr__(self): 1230 class_name = f"_io.{self.__class__.__qualname__}" 1231 if self.closed: 1232 return f"<{class_name} [closed]>" 1233 try: 1234 name = self.name 1235 except AttributeError: 1236 return ( 1237 f"<{class_name} name={self._fd} " 1238 f"mode={self.mode!r} closefd={self._closefd!r}>" 1239 ) 1240 else: 1241 return ( 1242 f"<{class_name} name={name!r} " 1243 f"mode={self.mode!r} closefd={self._closefd!r}>" 1244 ) 1245 1246 def _checkReadable(self): 1247 if not self._readable: 1248 raise UnsupportedOperation("File not open for reading") 1249 1250 def _checkWritable(self, msg=None): 1251 if not self._writable: 1252 raise UnsupportedOperation("File not open for writing") 1253 1254 def read(self, size=None): 1255 self._checkClosed() 1256 self._checkReadable() 1257 if size is None or size < 0: 1258 return FileIO.readall(self) 1259 try: 1260 return _os_read(self._fd, size) 1261 except BlockingIOError: 1262 return None 1263 1264 def readall(self): 1265 _builtin() 1266 1267 def readinto(self, byteslike): 1268 _builtin() 1269 1270 def write(self, byteslike): 1271 self._checkClosed() 1272 self._checkWritable() 1273 buf = byteslike 1274 if not _byteslike_check(byteslike): 1275 if not _object_type_hasattr(byteslike, "__buffer__"): 1276 raise TypeError( 1277 "a bytes-like object is required, not " 1278 f"'{_type(byteslike).__name__}'" 1279 ) 1280 try: 1281 buf = byteslike.__buffer__() 1282 except Exception: 1283 raise TypeError( 1284 "a bytes-like object is required, not " 1285 f"'{_type(byteslike).__name__}'" 1286 ) 1287 if not _bytes_check(buf): 1288 raise TypeError( 1289 "a bytes-like object is required, not " 1290 f"'{_type(byteslike).__name__}'" 1291 ) 1292 try: 1293 return _os_write(self._fd, buf) 1294 except BlockingIOError: 1295 return None 1296 1297 def seek(self, pos, whence=0): 1298 _whence_guard(whence) 1299 if _float_check(pos): 1300 raise TypeError("an integer is required") 1301 self._checkClosed() 1302 return _os_lseek(self._fd, pos, whence) 1303 1304 def tell(self): 1305 self._checkClosed() 1306 return _os_lseek(self._fd, 0, 1) 1307 1308 def truncate(self, size=None): 1309 self._checkClosed() 1310 self._checkWritable() 1311 if size is None: 1312 size = self.tell() 1313 _os_ftruncate(self._fd, size) 1314 return size 1315 1316 def close(self): 1317 if not self.closed: 1318 try: 1319 if self._closefd: 1320 _os_close(self._fd) 1321 finally: 1322 _RawIOBase.close(self) 1323 self._fd = -1 1324 1325 def seekable(self): 1326 self._checkClosed() 1327 if self._seekable is None: 1328 try: 1329 FileIO.tell(self) 1330 self._seekable = True 1331 except OSError: 1332 self._seekable = False 1333 return self._seekable 1334 1335 def readable(self): 1336 self._checkClosed() 1337 return self._readable 1338 1339 def writable(self): 1340 self._checkClosed() 1341 return self._writable 1342 1343 def fileno(self): 1344 self._checkClosed() 1345 return self._fd 1346 1347 def isatty(self): 1348 self._checkClosed() 1349 return _os_isatty(self._fd) 1350 1351 @property 1352 def closefd(self): 1353 return self._closefd 1354 1355 @property 1356 def mode(self): 1357 if self._created: 1358 if self._readable: 1359 return "xb+" 1360 else: 1361 return "xb" 1362 elif self._appending: 1363 if self._readable: 1364 return "ab+" 1365 else: 1366 return "ab" 1367 elif self._readable: 1368 if self._writable: 1369 return "rb+" 1370 else: 1371 return "rb" 1372 else: 1373 return "wb" 1374 1375 1376class TextIOWrapper(_TextIOBase, bootstrap=True): 1377 _CHUNK_SIZE = 2048 1378 1379 # The write_through argument has no effect here since this 1380 # implementation always writes through. The argument is present only 1381 # so that the signature can match the signature of the C version. 1382 def __init__( # noqa: C901 1383 self, 1384 buffer, 1385 encoding=None, 1386 errors=None, 1387 newline=None, 1388 line_buffering=False, 1389 write_through=False, 1390 ): 1391 # Argument parsing happens first in CPython's _io module 1392 if encoding is None: 1393 encoding = "UTF-8" 1394 1395 elif not _str_check(encoding): 1396 raise TypeError( 1397 "TextIOWrapper() argument 2 must be str or None, not " 1398 f"{_type(encoding).__name__}" 1399 ) 1400 if errors is None: 1401 errors = "strict" 1402 elif not _str_check(errors): 1403 raise TypeError( 1404 "TextIOWrapper() argument 'errors' must be str or None, not " 1405 f"{_type(errors).__name__}" 1406 ) 1407 if newline is not None and not _str_check(newline): 1408 raise TypeError( 1409 "TextIOWrapper() argument 4 must be str or None, not " 1410 f"{_type(newline).__name__}" 1411 ) 1412 if line_buffering is None: 1413 line_buffering = False 1414 elif not _int_check(line_buffering): 1415 raise TypeError( 1416 f"an integer is required (got type {_type(line_buffering).__name__})" 1417 ) 1418 if write_through is None: 1419 write_through = False 1420 elif not _int_check(write_through): 1421 raise TypeError( 1422 f"an integer is required (got type {_type(write_through).__name__})" 1423 ) 1424 1425 if newline not in (None, "", "\n", "\r", "\r\n"): 1426 raise ValueError(f"illegal newline value: {newline}") 1427 1428 self._buffer = buffer 1429 self._line_buffering = bool(line_buffering) 1430 self._encoding = encoding 1431 self._errors = errors 1432 self._readuniversal = not newline 1433 self._readtranslate = newline is None 1434 self._readnl = newline 1435 self._writetranslate = newline != "" 1436 self._writenl = newline or _os_linesep 1437 self._decoder = self._get_decoder() if buffer.readable() else None 1438 self._encoder = self._get_encoder() if buffer.writable() else None 1439 self._decoded_chars = "" # buffer for text returned from decoder 1440 self._decoded_chars_used = 0 # offset into _decoded_chars for read() 1441 self._snapshot = None # info for reconstructing decoder state 1442 self._seekable = self._telling = buffer.seekable() 1443 self._has_read1 = hasattr(buffer, "read1") 1444 self._b2cratio = 0.0 1445 1446 if self._seekable and self._encoder: 1447 position = self.buffer.tell() 1448 if position != 0: 1449 self._encoder.setstate(0) 1450 1451 def __next__(self): 1452 _TextIOWrapper_attached_guard(self) 1453 self._telling = False 1454 line = self.readline() 1455 if not _str_check(line): 1456 raise IOError( 1457 "readline() should have returned a str object, not " 1458 f"'{_type(line).__name__}'" 1459 ) 1460 if not line: 1461 self._snapshot = None 1462 self._telling = self._seekable 1463 raise StopIteration 1464 return line 1465 1466 # self._snapshot is either None, or a tuple (dec_flags, next_input) 1467 # where dec_flags is the second (integer) item of the decoder state 1468 # and next_input is the chunk of input bytes that comes next after the 1469 # snapshot point. We use this to reconstruct decoder states in tell(). 1470 1471 # Naming convention: 1472 # - "bytes_..." for integer variables that count input bytes 1473 # - "chars_..." for integer variables that count decoded characters 1474 1475 def __repr__(self): 1476 try: 1477 name_component = f" name={self.name!r}" 1478 except Exception: 1479 name_component = "" 1480 try: 1481 mode_component = "" if self.mode is None else f" mode={self.mode!r}" 1482 except Exception: 1483 mode_component = "" 1484 return ( 1485 f"<_io.TextIOWrapper{name_component}" 1486 f"{mode_component} encoding={self._encoding!r}>" 1487 ) 1488 1489 def _get_decoded_chars(self, n=None): 1490 offset = self._decoded_chars_used 1491 if n is None: 1492 chars = self._decoded_chars[offset:] 1493 else: 1494 chars = self._decoded_chars[offset : offset + n] 1495 self._decoded_chars_used += len(chars) 1496 return chars 1497 1498 def _get_decoder(self): 1499 make_decoder = _codecs_getincrementaldecoder(self._encoding) 1500 decoder = make_decoder(self._errors) 1501 if self._readuniversal: 1502 return IncrementalNewlineDecoder(decoder, self._readtranslate) 1503 return decoder 1504 1505 def _get_encoder(self): 1506 make_encoder = _codecs_getincrementalencoder(self._encoding) 1507 return make_encoder(self._errors) 1508 1509 def _pack_cookie( 1510 self, position, dec_flags=0, bytes_to_feed=0, need_eof=0, chars_to_skip=0 1511 ): 1512 # The meaning of a tell() cookie is: seek to position, set the 1513 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them 1514 # into the decoder with need_eof as the EOF flag, then skip 1515 # chars_to_skip characters of the decoded result. For most simple 1516 # decoders, tell() will often just give a byte offset in the file. 1517 return ( 1518 position 1519 | (dec_flags << 64) 1520 | (bytes_to_feed << 128) 1521 | (chars_to_skip << 192) 1522 | bool(need_eof) << 256 1523 ) 1524 1525 def _read_chunk(self): 1526 # The return value is True unless EOF was reached. The decoded 1527 # string is placed in self._decoded_chars (replacing its previous 1528 # value). The entire input chunk is sent to the decoder, though 1529 # some of it may remain buffered in the decoder, yet to be 1530 # converted. 1531 1532 if self._decoder is None: 1533 raise UnsupportedOperation("not readable") 1534 1535 if self._telling: 1536 # To prepare for tell(), we need to snapshot a point in the 1537 # file where the decoder's input buffer is empty. 1538 1539 dec_buffer, dec_flags = self._decoder.getstate() 1540 # Given this, we know there was a valid snapshot point 1541 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). 1542 if not _bytes_check(dec_buffer): 1543 raise TypeError( 1544 "illegal decoder state: the first item should be a bytes " 1545 f"object, not '{_type(dec_buffer).__name__}'" 1546 ) 1547 1548 # Read a chunk, decode it, and put the result in self._decoded_chars. 1549 if self._has_read1: 1550 input_chunk = self.buffer.read1(self._CHUNK_SIZE) 1551 else: 1552 input_chunk = self.buffer.read(self._CHUNK_SIZE) 1553 eof = not input_chunk 1554 decoded_chars = self._decoder.decode(input_chunk, eof) 1555 self._set_decoded_chars(decoded_chars) 1556 if decoded_chars: 1557 self._b2cratio = len(input_chunk) / len(self._decoded_chars) 1558 else: 1559 self._b2cratio = 0.0 1560 1561 if self._telling: 1562 # At the snapshot point, len(dec_buffer) bytes before the read, 1563 # the next input to be decoded is dec_buffer + input_chunk. 1564 self._snapshot = (dec_flags, dec_buffer + input_chunk) 1565 1566 return not eof 1567 1568 def _rewind_decoded_chars(self, n): 1569 if self._decoded_chars_used < n: 1570 raise AssertionError("rewind decoded_chars out of bounds") 1571 self._decoded_chars_used -= n 1572 1573 # The following three methods implement an ADT for _decoded_chars. 1574 # Text returned from the decoder is buffered here until the client 1575 # requests it by calling our read() or readline() method. 1576 def _set_decoded_chars(self, chars): 1577 self._decoded_chars = chars 1578 self._decoded_chars_used = 0 1579 1580 def _unpack_cookie(self, bigint): 1581 rest, position = divmod(bigint, 1 << 64) 1582 rest, dec_flags = divmod(rest, 1 << 64) 1583 rest, bytes_to_feed = divmod(rest, 1 << 64) 1584 need_eof, chars_to_skip = divmod(rest, 1 << 64) 1585 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip 1586 1587 @property 1588 def buffer(self): 1589 return self._buffer 1590 1591 def close(self): 1592 _TextIOWrapper_attached_guard(self) 1593 if not self.closed: 1594 try: 1595 self.flush() 1596 finally: 1597 self._buffer.close() 1598 1599 @property 1600 def closed(self): 1601 _TextIOWrapper_attached_guard(self) 1602 return self._buffer.closed 1603 1604 def detach(self): 1605 _TextIOWrapper_attached_guard(self) 1606 self.flush() 1607 buffer = self._buffer 1608 self._buffer = None 1609 return buffer 1610 1611 @property 1612 def encoding(self): 1613 return self._encoding 1614 1615 @property 1616 def errors(self): 1617 return self._errors 1618 1619 def fileno(self): 1620 _TextIOWrapper_attached_guard(self) 1621 return self._buffer.fileno() 1622 1623 def flush(self): 1624 result = _TextIOWrapper_attached_closed_guard(self) 1625 if result is _Unbound: 1626 self._checkClosed() 1627 self.buffer.flush() 1628 self._telling = self._seekable 1629 1630 def isatty(self): 1631 _TextIOWrapper_attached_guard(self) 1632 return self.buffer.isatty() 1633 1634 @property 1635 def line_buffering(self): 1636 return self._line_buffering 1637 1638 @property 1639 def name(self): 1640 _TextIOWrapper_attached_guard(self) 1641 return self._buffer.name 1642 1643 @property 1644 def newlines(self): 1645 _TextIOWrapper_attached_guard(self) 1646 if self._decoder is None: 1647 return None 1648 try: 1649 return self._decoder.newlines 1650 except AttributeError: 1651 return None 1652 1653 def read(self, size=None): 1654 if size is None: 1655 size = -1 1656 elif not _int_check(size): 1657 raise TypeError(f"integer argument expected, got '{_type(size).__name__}'") 1658 1659 result = _TextIOWrapper_attached_closed_guard(self) 1660 if result is _Unbound: 1661 self._checkClosed() 1662 self._checkReadable("not readable") 1663 1664 decoder = self._decoder 1665 try: 1666 size.__index__ 1667 except AttributeError as err: 1668 raise TypeError("an integer is required") from err 1669 if size < 0: 1670 # Read everything. 1671 result = self._get_decoded_chars() + decoder.decode( 1672 self._buffer.read(), final=True 1673 ) 1674 self._set_decoded_chars("") 1675 self._snapshot = None 1676 return result 1677 else: 1678 # Keep reading chunks until we have size characters to return. 1679 eof = False 1680 result = self._get_decoded_chars(size) 1681 while len(result) < size and not eof: 1682 eof = not self._read_chunk() 1683 result += self._get_decoded_chars(size - len(result)) 1684 return result 1685 1686 def readable(self): 1687 _TextIOWrapper_attached_guard(self) 1688 return self._buffer.readable() 1689 1690 def readline(self, size=None): # noqa: C901 1691 result = _TextIOWrapper_attached_closed_guard(self) 1692 if result is _Unbound: 1693 self._checkClosed() 1694 1695 if size is None: 1696 size = -1 1697 elif not _int_check(size): 1698 size = _index(size) 1699 1700 # Grab all the decoded text (we will rewind any extra bits later). 1701 line = self._get_decoded_chars() 1702 1703 start = 0 1704 1705 pos = endpos = None 1706 while True: 1707 if self._readtranslate: 1708 # Newlines are already translated, only search for \n 1709 pos = line.find("\n", start) 1710 if pos >= 0: 1711 endpos = pos + 1 1712 break 1713 else: 1714 start = len(line) 1715 1716 elif self._readuniversal: 1717 # Universal newline search. Find any of \r, \r\n, \n 1718 # The decoder ensures that \r\n are not split in two pieces 1719 nlpos = line.find("\n", start) 1720 crpos = line.find("\r", start) 1721 if crpos == -1: 1722 if nlpos == -1: 1723 # Nothing found 1724 start = len(line) 1725 else: 1726 # Found \n 1727 endpos = nlpos + 1 1728 break 1729 elif nlpos == -1: 1730 # Found lone \r 1731 endpos = crpos + 1 1732 break 1733 elif nlpos < crpos: 1734 # Found \n 1735 endpos = nlpos + 1 1736 break 1737 elif nlpos == crpos + 1: 1738 # Found \r\n 1739 endpos = crpos + 2 1740 break 1741 else: 1742 # Found \r 1743 endpos = crpos + 1 1744 break 1745 else: 1746 # non-universal 1747 pos = line.find(self._readnl) 1748 if pos >= 0: 1749 endpos = pos + len(self._readnl) 1750 break 1751 1752 if size >= 0 and len(line) >= size: 1753 endpos = size # reached length size 1754 break 1755 1756 # No line ending seen yet - get more data' 1757 while self._read_chunk(): 1758 if self._decoded_chars: 1759 break 1760 if self._decoded_chars: 1761 line += self._get_decoded_chars() 1762 else: 1763 # end of file 1764 self._set_decoded_chars("") 1765 self._snapshot = None 1766 return line 1767 1768 if size >= 0 and endpos > size: 1769 endpos = size # don't exceed size 1770 1771 # Rewind _decoded_chars to just after the line ending we found. 1772 self._rewind_decoded_chars(len(line) - endpos) 1773 return line[:endpos] 1774 1775 def _reset_encoder(self, position): 1776 if self._encoder: 1777 if position != 0: 1778 self._encoder.setstate(0) 1779 else: 1780 self._encoder.reset() 1781 1782 def seek(self, cookie, whence=0): # noqa: C901 1783 if not _int_check(whence): 1784 raise TypeError( 1785 f"an integer is required (got type {_type(whence).__name__})" 1786 ) 1787 result = _TextIOWrapper_attached_closed_seekable_guard(self) 1788 if result is _Unbound: 1789 self._checkClosed() 1790 self._checkSeekable("underlying stream is not seekable") 1791 1792 if whence == 1: # seek relative to current position 1793 if cookie != 0: 1794 raise UnsupportedOperation("can't do nonzero cur-relative seeks") 1795 # Seeking to the current position should attempt to 1796 # sync the underlying buffer with the current position. 1797 whence = 0 1798 cookie = self.tell() 1799 elif whence == 2: # seek relative to end of file 1800 if cookie != 0: 1801 raise UnsupportedOperation("can't do nonzero end-relative seeks") 1802 self.flush() 1803 position = self.buffer.seek(0, 2) 1804 self._set_decoded_chars("") 1805 self._snapshot = None 1806 if self._decoder: 1807 self._decoder.reset() 1808 self._reset_encoder(position) 1809 return position 1810 elif whence != 0: 1811 raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)") 1812 if cookie < 0: 1813 raise ValueError(f"negative seek position {cookie!r}") 1814 self.flush() 1815 1816 # The strategy of seek() is to go back to the safe start point 1817 # and replay the effect of read(chars_to_skip) from there. 1818 unpacked = self._unpack_cookie(cookie) 1819 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = unpacked 1820 # Seek back to the safe start point. 1821 self.buffer.seek(start_pos) 1822 self._set_decoded_chars("") 1823 self._snapshot = None 1824 1825 # Restore the decoder to its state from the safe start point. 1826 if cookie == 0 and self._decoder: 1827 self._decoder.reset() 1828 elif self._decoder or dec_flags or chars_to_skip: 1829 self._decoder = self._decoder 1830 self._decoder.setstate((b"", dec_flags)) 1831 self._snapshot = (dec_flags, b"") 1832 1833 if chars_to_skip: 1834 # Just like _read_chunk, feed the decoder and save a snapshot. 1835 input_chunk = self.buffer.read(bytes_to_feed) 1836 self._set_decoded_chars(self._decoder.decode(input_chunk, need_eof)) 1837 self._snapshot = (dec_flags, input_chunk) 1838 1839 # Skip chars_to_skip of the decoded characters. 1840 if len(self._decoded_chars) < chars_to_skip: 1841 raise OSError("can't restore logical file position") 1842 self._decoded_chars_used = chars_to_skip 1843 1844 self._reset_encoder(cookie) 1845 return cookie 1846 1847 def seekable(self): 1848 _TextIOWrapper_attached_guard(self) 1849 return self._buffer.seekable() 1850 1851 def tell(self): # noqa: C901 1852 result = _TextIOWrapper_attached_closed_seekable_guard(self) 1853 if result is _Unbound: 1854 self._checkClosed() 1855 self._checkSeekable("underlying stream is not seekable") 1856 if not self._telling: 1857 raise OSError("telling position disabled by next() call") 1858 self.flush() 1859 position = self.buffer.tell() 1860 decoder = self._decoder 1861 if decoder is None or self._snapshot is None: 1862 if self._decoded_chars: 1863 # This should never happen. 1864 raise AssertionError("pending decoded text") 1865 return position 1866 1867 # Skip backward to the snapshot point (see _read_chunk). 1868 dec_flags, next_input = self._snapshot 1869 position -= len(next_input) 1870 1871 # How many decoded characters have been used up since the snapshot? 1872 chars_to_skip = self._decoded_chars_used 1873 if chars_to_skip == 0: 1874 # We haven't moved from the snapshot point. 1875 return self._pack_cookie(position, dec_flags) 1876 1877 # Starting from the snapshot position, we will walk the decoder 1878 # forward until it gives us enough decoded characters. 1879 saved_state = decoder.getstate() 1880 try: 1881 # Fast search for an acceptable start point, close to our 1882 # current pos. 1883 # Rationale: calling decoder.decode() has a large overhead 1884 # regardless of chunk size; we want the number of such calls to 1885 # be O(1) in most situations (common decoders, non-crazy input). 1886 # Actually, it will be exactly 1 for fixed-size codecs (all 1887 # 8-bit codecs, also UTF-16 and UTF-32). 1888 skip_bytes = int(self._b2cratio * chars_to_skip) 1889 skip_back = 1 1890 assert skip_bytes <= len(next_input) 1891 while skip_bytes > 0: 1892 decoder.setstate((b"", dec_flags)) 1893 # Decode up to temptative start point 1894 n = len(decoder.decode(next_input[:skip_bytes])) 1895 if n <= chars_to_skip: 1896 b, d = decoder.getstate() 1897 if not b: 1898 # Before pos and no bytes buffered in decoder => OK 1899 dec_flags = d 1900 chars_to_skip -= n 1901 break 1902 # Skip back by buffered amount and reset heuristic 1903 skip_bytes -= len(b) 1904 skip_back = 1 1905 else: 1906 # We're too far ahead, skip back a bit 1907 skip_bytes -= skip_back 1908 skip_back = skip_back * 2 1909 else: 1910 skip_bytes = 0 1911 decoder.setstate((b"", dec_flags)) 1912 1913 # Note our initial start point. 1914 start_pos = position + skip_bytes 1915 start_flags = dec_flags 1916 if chars_to_skip == 0: 1917 # We haven't moved from the start point. 1918 return self._pack_cookie(start_pos, start_flags) 1919 1920 # Feed the decoder one byte at a time. As we go, note the 1921 # nearest "safe start point" before the current location 1922 # (a point where the decoder has nothing buffered, so seek() 1923 # can safely start from there and advance to this location). 1924 bytes_fed = 0 1925 need_eof = 0 1926 # Chars decoded since `start_pos` 1927 chars_decoded = 0 1928 for i in range(skip_bytes, len(next_input)): 1929 bytes_fed += 1 1930 chars_decoded += len(decoder.decode(next_input[i : i + 1])) 1931 dec_buffer, dec_flags = decoder.getstate() 1932 if not dec_buffer and chars_decoded <= chars_to_skip: 1933 # Decoder buffer is empty, so this is a safe start point. 1934 start_pos += bytes_fed 1935 chars_to_skip -= chars_decoded 1936 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 1937 if chars_decoded >= chars_to_skip: 1938 break 1939 else: 1940 # We didn't get enough decoded data; signal EOF to get more. 1941 chars_decoded += len(decoder.decode(b"", final=True)) 1942 need_eof = 1 1943 if chars_decoded < chars_to_skip: 1944 raise OSError("can't reconstruct logical file position") 1945 1946 # The returned cookie corresponds to the last safe start point. 1947 return self._pack_cookie( 1948 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip 1949 ) 1950 finally: 1951 decoder.setstate(saved_state) 1952 1953 def truncate(self, pos=None): 1954 _TextIOWrapper_attached_guard(self) 1955 self.flush() 1956 return self.buffer.truncate(pos) 1957 1958 def writable(self): 1959 _TextIOWrapper_attached_guard(self) 1960 return self.buffer.writable() 1961 1962 def write(self, text): 1963 result = _TextIOWrapper_write_UTF8(self, text) 1964 if result is not _Unbound: 1965 return result 1966 1967 if not _str_check(text): 1968 raise TypeError(f"write() argument must be str, not {_type(text).__name__}") 1969 result = _TextIOWrapper_attached_closed_guard(self) 1970 if result is _Unbound: 1971 self._checkClosed() 1972 length = _str_len(text) 1973 haslf = (self._writetranslate or self._line_buffering) and "\n" in text 1974 if haslf and self._writetranslate and self._writenl != "\n": 1975 text = text.replace("\n", self._writenl) 1976 encoder = self._encoder 1977 b = encoder.encode(text) 1978 self.buffer.write(b) 1979 if self._line_buffering and (haslf or "\r" in text): 1980 self.flush() 1981 self._set_decoded_chars("") 1982 self._snapshot = None 1983 if self._decoder: 1984 self._decoder.reset() 1985 return length 1986 1987 1988class StringIO(_TextIOBase, bootstrap=True): 1989 def __init__(self, initial_value="", newline="\n"): 1990 _builtin() 1991 1992 def __next__(self): 1993 _builtin() 1994 1995 def __repr__(self): 1996 return f"<_io.StringIO object at {_address(self):#x}>" 1997 1998 @property 1999 def encoding(self): 2000 return None 2001 2002 @property 2003 def errors(self): 2004 return None 2005 2006 def getvalue(self): 2007 _builtin() 2008 2009 def readable(self): 2010 _StringIO_closed_guard(self) 2011 return True 2012 2013 def seekable(self): 2014 _StringIO_closed_guard(self) 2015 return True 2016 2017 def writable(self): 2018 _StringIO_closed_guard(self) 2019 return True 2020 2021 def close(self): 2022 _builtin() 2023 2024 @property 2025 def closed(self): 2026 return self._closed 2027 2028 @property 2029 def line_buffering(self): 2030 _StringIO_closed_guard(self) 2031 return False 2032 2033 @property 2034 def newlines(self): 2035 if self._readtranslate is None: 2036 return None 2037 return ( 2038 None, 2039 "\n", 2040 "\r", 2041 ("\r", "\n"), 2042 "\r\n", 2043 ("\n", "\r\n"), 2044 ("\r", "\r\n"), 2045 ("\r", "\n", "\r\n"), 2046 )[self._seennl] 2047 2048 def read(self, size=None): 2049 _builtin() 2050 2051 def readline(self, size=None): 2052 _builtin() 2053 2054 def seek(self, offset, whence=0): 2055 result = _StringIO_seek(self, offset, whence) 2056 if result is not _Unbound: 2057 return result 2058 return _StringIO_seek(self, _index(offset), _obj_as_int(whence)) 2059 2060 def tell(self): # noqa: C901 2061 _StringIO_closed_guard(self) 2062 return self._pos 2063 2064 def truncate(self, size=None): 2065 _builtin() 2066 2067 def write(self, value): 2068 _builtin() 2069 2070 2071def _fspath(obj): 2072 if _str_check(obj) or _bytes_check(obj): 2073 return obj 2074 dunder_fspath = _object_type_getattr(obj, "__fspath__") 2075 if dunder_fspath is _Unbound: 2076 raise TypeError("expected str, bytes, or os.PathLike object") 2077 result = dunder_fspath() 2078 if _str_check(result) or _bytes_check(result): 2079 return result 2080 raise TypeError("expected __fspath__ to return str or bytes") 2081 2082 2083def open( # noqa: C901 2084 file, 2085 mode="r", 2086 buffering=-1, 2087 encoding=None, 2088 errors=None, 2089 newline=None, 2090 closefd=True, 2091 opener=None, 2092): 2093 if not _int_check(file): 2094 file = _fspath(file) 2095 if not _str_check(file) and not _bytes_check(file) and not _int_check(file): 2096 # TODO(emacs): Is this check necessary? os.fspath guarantees str/bytes, 2097 # above check guarantees int or str or bytes 2098 raise TypeError("invalid file: %r" % file) 2099 if not _str_check(mode): 2100 raise TypeError(f"open() argument 2 must be str, not {_type(mode).__name__}") 2101 if not _int_check(buffering): 2102 raise TypeError( 2103 f"an integer is required (got type {_type(buffering).__name__})" 2104 ) 2105 if encoding is not None and not _str_check(encoding): 2106 raise TypeError( 2107 f"open() argument 4 must be str or None, not {_type(encoding).__name__}" 2108 ) 2109 if errors is not None and not _str_check(errors): 2110 raise TypeError( 2111 f"open() argument 5 must be str or None, not {_type(errors).__name__}" 2112 ) 2113 modes = set(mode) 2114 if modes - set("axrwb+tU") or len(mode) > len(modes): 2115 raise ValueError("invalid mode: %r" % mode) 2116 creating = "x" in modes 2117 reading = "r" in modes 2118 writing = "w" in modes 2119 appending = "a" in modes 2120 updating = "+" in modes 2121 text = "t" in modes 2122 binary = "b" in modes 2123 if "U" in modes: 2124 if creating or writing or appending or updating: 2125 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'") 2126 _warn("'U' mode is deprecated", DeprecationWarning, 2) 2127 reading = True 2128 if text and binary: 2129 raise ValueError("can't have text and binary mode at once") 2130 if creating + reading + writing + appending > 1: 2131 raise ValueError("must have exactly one of create/read/write/append mode") 2132 if not (creating or reading or writing or appending): 2133 raise ValueError( 2134 "Must have exactly one of create/read/write/append mode and at " 2135 "most one plus" 2136 ) 2137 if binary and encoding is not None: 2138 raise ValueError("binary mode doesn't take an encoding argument") 2139 if binary and errors is not None: 2140 raise ValueError("binary mode doesn't take an errors argument") 2141 if binary and newline is not None: 2142 raise ValueError("binary mode doesn't take a newline argument") 2143 if binary and buffering == 1: 2144 _warn( 2145 "line buffering (buffering=1) isn't supported in " 2146 "binary mode, the default buffer size will be used", 2147 RuntimeWarning, 2148 2, 2149 ) 2150 raw = FileIO( 2151 file, 2152 (creating and "x" or "") 2153 + (reading and "r" or "") 2154 + (writing and "w" or "") 2155 + (appending and "a" or "") 2156 + (updating and "+" or ""), 2157 closefd, 2158 opener=opener, 2159 ) 2160 result = raw 2161 try: 2162 line_buffering = False 2163 if buffering == 1 or buffering < 0 and raw.isatty(): 2164 buffering = -1 2165 line_buffering = True 2166 if buffering < 0: 2167 buffering = DEFAULT_BUFFER_SIZE 2168 if buffering == 0: 2169 if binary: 2170 return result 2171 raise ValueError("can't have unbuffered text I/O") 2172 if updating: 2173 buffer = BufferedRandom(raw, buffering) 2174 elif creating or writing or appending: 2175 buffer = BufferedWriter(raw, buffering) 2176 elif reading: 2177 buffer = BufferedReader(raw, buffering) 2178 else: 2179 raise ValueError("unknown mode: %r" % mode) 2180 result = buffer 2181 if binary: 2182 return result 2183 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) 2184 result = text 2185 text.mode = mode 2186 return result 2187 except Exception: 2188 result.close() 2189 raise 2190 2191 2192def open_code(path): 2193 _str_guard(path) 2194 return open(path, "rb")