this repo has no description
1#!/usr/bin/env python3
2# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
3"""The io module provides the Python interfaces to stream handling. The
4builtin open function is defined in this module.
5
6At the top of the I/O hierarchy is the abstract base class IOBase. It
7defines the basic interface to a stream. Note, however, that there is no
8separation between reading and writing to streams; implementations are
9allowed to raise an IOError if they do not support a given operation.
10
11Extending IOBase is RawIOBase which deals simply with the reading and
12writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide
13an interface to OS files.
14
15BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its
16subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer
17streams that are readable, writable, and both respectively.
18BufferedRandom provides a buffered interface to random access
19streams. BytesIO is a simple stream of in-memory bytes.
20
21Another IOBase subclass, TextIOBase, deals with the encoding and decoding
22of streams into text. TextIOWrapper, which extends it, is a buffered text
23interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO
24is an in-memory stream for text.
25
26Argument names are not part of the specification, and only the arguments
27of open() are intended to be used as keyword arguments."""
28
29import builtins # noqa: F401
30from _codecs import (
31 getincrementaldecoder as _codecs_getincrementaldecoder,
32 getincrementalencoder as _codecs_getincrementalencoder,
33)
34from _thread import LockType as _thread_Lock
35from builtins import BlockingIOError, _index, _non_heaptype, _obj_as_int, _type_name
36from errno import EAGAIN as errno_EAGAIN, EISDIR as errno_EISDIR
37
38from _builtins import (
39 _address,
40 _builtin,
41 _bytearray_len,
42 _bytes_check,
43 _byteslike_check,
44 _float_check,
45 _int_check,
46 _memoryview_check,
47 _object_type_getattr,
48 _object_type_hasattr,
49 _os_write,
50 _str_check,
51 _str_guard,
52 _str_len,
53 _type,
54 _Unbound,
55 _unimplemented,
56 _warn,
57)
58from _os import (
59 close as _os_close,
60 fstat_size as _os_fstat_size,
61 ftruncate as _os_ftruncate,
62 isatty as _os_isatty,
63 isdir as _os_isdir,
64 linesep as _os_linesep,
65 lseek as _os_lseek,
66 open as _os_open,
67 parse_mode as _os_parse_mode,
68 read as _os_read,
69 set_noinheritable as _os_set_noinheritable,
70)
71
72
73DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
74
75
76def _BytesIO_closed_guard(obj):
77 _builtin()
78
79
80def _BytesIO_guard(obj):
81 _builtin()
82
83
84def _BytesIO_seek(self, pos, whence):
85 _builtin()
86
87
88def _BytesIO_truncate(self, pos):
89 _builtin()
90
91
92def _BytesIO_seek(self, offset, whence):
93 _builtin()
94
95
96def _StringIO_closed_guard(obj):
97 _builtin()
98
99
100def _StringIO_seek(self, offset, whence):
101 _builtin()
102
103
104def _TextIOWrapper_attached_guard(obj):
105 _builtin()
106
107
108def _TextIOWrapper_attached_closed_guard(obj):
109 _builtin()
110
111
112def _TextIOWrapper_attached_closed_seekable_guard(obj):
113 _builtin()
114
115
116def _TextIOWrapper_write_UTF8(self, text):
117 _builtin()
118
119
120def _buffered_reader_clear_buffer(self):
121 _builtin()
122
123
124def _buffered_reader_init(self, buffer_size):
125 _builtin()
126
127
128def _buffered_reader_peek(self, size=0):
129 _builtin()
130
131
132def _buffered_reader_read(self, size=None):
133 _builtin()
134
135
136def _buffered_reader_readline(self, size=None):
137 _builtin()
138
139
140def _detached_guard(self):
141 if self.raw is None:
142 raise ValueError("raw stream has been detached")
143
144
145def _whence_guard(whence):
146 if whence == 0 or whence == 1 or whence == 2:
147 return
148 raise ValueError("invalid whence value")
149
150
151class IncrementalNewlineDecoder(bootstrap=True):
152 def __init__(self, decoder, translate, errors="strict"):
153 if not _int_check(translate):
154 raise TypeError(
155 f"an integer is required (got type {_type(translate).__name__})"
156 )
157 self._errors = errors
158 self._translate = translate
159 self._decoder = decoder
160 self._seennl = 0
161 self._pendingcr = False
162
163 def decode(self, input, final=False):
164 if not _int_check(final):
165 raise TypeError(
166 f"an integer is required (got type {_type(final).__name__})"
167 )
168 # decode input (with the eventual \r from a previous pass)
169 if self._decoder is None:
170 output = input
171 else:
172 output = self._decoder.decode(input, final=bool(final))
173 if self._pendingcr and (output or final):
174 output = "\r" + output
175 self._pendingcr = False
176
177 # retain last \r even when not translating data:
178 # then readline() is sure to get \r\n in one pass
179 if output.endswith("\r") and not final:
180 output = output[:-1]
181 self._pendingcr = True
182
183 # Record which newlines are read
184 crlf = output.count("\r\n")
185 cr = output.count("\r") - crlf
186 lf = output.count("\n") - crlf
187 self._seennl |= (lf and self._LF) | (cr and self._CR) | (crlf and self._CRLF)
188
189 if self._translate:
190 if crlf:
191 output = output.replace("\r\n", "\n")
192 if cr:
193 output = output.replace("\r", "\n")
194
195 return output
196
197 def getstate(self):
198 if self._decoder is None:
199 buf = b""
200 flag = 0
201 else:
202 buf, flag = self._decoder.getstate()
203 flag <<= 1
204 if self._pendingcr:
205 flag |= 1
206 return buf, flag
207
208 def setstate(self, state):
209 buf, flag = state
210 self._pendingcr = bool(flag & 1)
211 if self._decoder is not None:
212 self._decoder.setstate((buf, flag >> 1))
213
214 def reset(self):
215 self._seennl = 0
216 self._pendingcr = False
217 if self._decoder is not None:
218 self._decoder.reset()
219
220 _LF = 1
221 _CR = 2
222 _CRLF = 4
223
224 @property
225 def newlines(self):
226 return (
227 None,
228 "\n",
229 "\r",
230 ("\r", "\n"),
231 "\r\n",
232 ("\n", "\r\n"),
233 ("\r", "\r\n"),
234 ("\r", "\n", "\r\n"),
235 )[self._seennl]
236
237
238class UnsupportedOperation(OSError, ValueError):
239 pass
240
241
242class _IOBase(bootstrap=True):
243 """The abstract base class for all I/O classes, acting on streams of
244 bytes. There is no public constructor.
245
246 This class provides default method implementations that derived classes can
247 override selectively; the default implementations represent a file that
248 cannot be read, written or seeked.
249
250 The basic type used for binary data read from or written to a file is
251 bytes. Other bytes-like objects are accepted as method arguments too. In
252 some cases (such as readinto), a writable object is required.
253
254 Note that calling any method (even inquiries) on a closed stream is
255 undefined. Implementations may raise OSError in this case.
256 """
257
258 ### Internal ###
259
260 def _unsupported(self, name):
261 """Internal: raise an OSError exception for unsupported operations."""
262 raise UnsupportedOperation(f"{self.__class__.__name__}.{name}() not supported")
263
264 ### Positioning ###
265
266 def seek(self, pos, whence=0):
267 """Change stream position.
268
269 Change the stream position to byte offset pos. Argument pos is
270 interpreted relative to the position indicated by whence. Values
271 for whence are ints:
272
273 * SEEK_SET=0 -- start of stream (the default); offset should be zero or
274 positive
275 * SEEK_CUR=1 -- current stream position; offset may be negative
276 * SEEK_END=2 -- end of stream; offset is usually negative
277 Some operating systems / file systems could provide additional values.
278
279 Return an int indicating the new absolute position.
280 """
281 self._unsupported("seek")
282
283 def tell(self):
284 """Return an int indicating the current stream position."""
285 return self.seek(0, 1)
286
287 def truncate(self, pos=None):
288 """Truncate file to size bytes.
289
290 Size defaults to the current IO position as reported by tell(). Return
291 the new size.
292 """
293 self._unsupported("truncate")
294
295 ### Flush and close ###
296
297 def flush(self):
298 """Flush write buffers, if applicable.
299
300 This is not implemented for read-only and non-blocking streams.
301 """
302 self._checkClosed()
303
304 def close(self):
305 """Flush and close the IO object.
306
307 This method has no effect if the file is already closed.
308
309 Note that calling any method (even inquiries) on a closed stream is
310 undefined. Implementations may raise OSError in this case.
311 """
312 if not self._closed:
313 try:
314 self.flush()
315 finally:
316 self._closed = True
317
318 ### Inquiries ###
319
320 def seekable(self):
321 """Return a bool indicating whether object supports random access.
322
323 If False, seek(), tell() and truncate() will raise OSError.
324 This method may need to do a test seek().
325 """
326 return False
327
328 def _checkSeekable(self, msg=None):
329 """Internal: raise UnsupportedOperation if file is not seekable"""
330 if not self.seekable():
331 raise UnsupportedOperation(
332 "File or stream is not seekable." if msg is None else msg
333 )
334
335 def readable(self):
336 """Return a bool indicating whether object was opened for reading.
337
338 If False, read() will raise OSError.
339 """
340 return False
341
342 def _checkReadable(self, msg=None):
343 """Internal: raise UnsupportedOperation if file is not readable"""
344 if not self.readable():
345 raise UnsupportedOperation(
346 "File or stream is not readable." if msg is None else msg
347 )
348
349 def writable(self):
350 """Return a bool indicating whether object was opened for writing.
351
352 If False, write() and truncate() will raise OSError.
353 """
354 return False
355
356 def _checkWritable(self, msg=None):
357 """Internal: raise UnsupportedOperation if file is not writable"""
358 if not self.writable():
359 raise UnsupportedOperation(
360 "File or stream is not writable." if msg is None else msg
361 )
362
363 @property
364 def closed(self):
365 """closed: bool. True iff the file has been closed.
366
367 For backwards compatibility, this is a property, not a predicate.
368 """
369 return self._closed
370
371 def _checkClosed(self, msg=None):
372 """Internal: raise a ValueError if file is closed"""
373 if self.closed:
374 raise ValueError("I/O operation on closed file." if msg is None else msg)
375
376 ### Context manager ###
377
378 def __enter__(self): # That's a forward reference
379 """Context management protocol. Returns self (an instance of IOBase).
380
381 IOBase supports the :keyword:`with` statement. In this example, fp
382 is closed after the suite of the with statement is complete:
383
384 with open('spam.txt', 'r') as fp:
385 fp.write('Spam and eggs!')
386 """
387 self._checkClosed()
388 return self
389
390 def __exit__(self, *args):
391 """Context management protocol. Calls close()"""
392 self.close()
393
394 ### Lower-level APIs ###
395
396 def fileno(self):
397 """Returns underlying file descriptor (an int) if one exists.
398
399 An OSError is raised if the IO object does not use a file descriptor.
400 """
401 self._unsupported("fileno")
402
403 def isatty(self):
404 """Return a bool indicating whether this is an 'interactive' stream.
405
406 Return False if it can't be determined.
407 """
408 self._checkClosed()
409 return False
410
411 ### Readline[s] and writelines ###
412
413 def _peek_readahead(self, size):
414 readahead = self.peek(1)
415 if not readahead:
416 return 1
417 n = (readahead.find(b"\n") + 1) or len(readahead)
418 if size >= 0:
419 # TODO(T47866758): Use less generic code to do this computation
420 # since all of the types are known ahead of time.
421 n = min(n, size)
422 return n
423
424 def _const_readahead(self, size):
425 return 1
426
427 def readline(self, size=-1):
428 r"""Read and return a line of bytes from the stream.
429
430 If size is specified, at most size bytes will be read.
431 Size should be an int.
432
433 The line terminator is always b'\n' for binary files; for text
434 files, the newlines argument to open can be used to select the line
435 terminator(s) recognized.
436 """
437 if hasattr(self, "peek"):
438 nreadahead = self._peek_readahead
439 else:
440 nreadahead = self._const_readahead
441
442 if size is None:
443 size = -1
444 elif not _int_check(size):
445 raise TypeError("size must be an integer")
446 res = bytearray()
447 while size < 0 or len(res) < size:
448 b = self.read(nreadahead(size))
449 if not b:
450 break
451 res += b
452 if res.endswith(b"\n"):
453 break
454 return bytes(res)
455
456 def __iter__(self):
457 """IOBase (and its subclasses) support the iterator protocol, meaning
458 that an IOBase object can be iterated over yielding the lines in a
459 stream.
460 """
461 self._checkClosed()
462 return self
463
464 def __next__(self):
465 line = self.readline()
466 if not line:
467 raise StopIteration
468 return line
469
470 def readlines(self, hint=None):
471 """Return a list of lines from the stream.
472
473 hint can be specified to control the number of lines read: no more
474 lines will be read if the total size (in bytes/characters) of all
475 lines so far exceeds hint.
476 """
477 if hint is None or hint <= 0:
478 return list(self)
479 n = 0
480 lines = []
481 for line in self:
482 lines.append(line)
483 n += len(line)
484 if n >= hint:
485 break
486 return lines
487
488 def writelines(self, lines):
489 self._checkClosed()
490 for line in lines:
491 self.write(line)
492
493
494class _TextIOBase(_IOBase, bootstrap=True):
495 def read(self, size=-1):
496 self._unsupported("read")
497
498 def write(self, s):
499 self._unsupported("write")
500
501 def readline(self):
502 self._unsupported("readline")
503
504 def detach(self):
505 self._unsupported("detach")
506
507 @property
508 def encoding(self):
509 return None
510
511 @property
512 def newlines(self):
513 return None
514
515 @property
516 def errors(self):
517 return None
518
519
520class _RawIOBase(_IOBase, bootstrap=True):
521 """Base class for raw binary I/O."""
522
523 def read(self, size=-1):
524 """Read and return up to size bytes, where size is an int.
525
526 Returns an empty bytes object on EOF, or None if the object is
527 set not to block and has no data to read.
528 """
529 if size < 0:
530 return self.readall()
531 # TODO(T47866758): This should use a mutablebytes or some other data
532 # structure to avoid so much copying and so many round-trips. Consider:
533 # 1. We create a bytearray
534 # 2. We pass that to readinto
535 # 3. readinto calls native code
536 # 4. Native code allocates some native memory to write into
537 # 5. Native code copies that native stuff out into the byte array
538 # 6. The byte array is copied out into bytes here
539 # Very slow.
540 b = bytearray(size.__index__())
541 n = self.readinto(b)
542 if n is None:
543 return None
544 del b[n:]
545 return bytes(b)
546
547 def readall(self):
548 """Read until EOF, using multiple read() call."""
549 res = bytearray()
550 while True:
551 data = self.read(DEFAULT_BUFFER_SIZE)
552 # data could be b'' or None
553 if not data:
554 break
555 # TODO(T47866758): This is a really sub-par readall that could
556 # stress the GC with large I/O operations. We really want a rope or
557 # similar data structure here.
558 res += data
559 if res:
560 return bytes(res)
561 # b'' or None
562 return data
563
564 def readinto(self, b):
565 """Read bytes into a pre-allocated bytes-like object b.
566
567 Returns an int representing the number of bytes read (0 for EOF), or
568 None if the object is set not to block and has no data to read.
569 """
570 raise NotImplementedError("readinto")
571
572 def write(self, b):
573 """Write the given buffer to the IO stream.
574
575 Returns the number of bytes written, which may be less than the
576 length of b in bytes.
577 """
578 raise NotImplementedError("write")
579
580
581class _BufferedIOBase(_IOBase, bootstrap=True):
582 """Base class for buffered IO objects.
583
584 The main difference with RawIOBase is that the read() method
585 supports omitting the size argument, and does not have a default
586 implementation that defers to readinto().
587
588 In addition, read(), readinto() and write() may raise
589 BlockingIOError if the underlying raw stream is in non-blocking
590 mode and not ready; unlike their raw counterparts, they will never
591 return None.
592
593 A typical implementation should not inherit from a RawIOBase
594 implementation, but wrap one.
595 """
596
597 def read(self, size=None):
598 """Read and return up to size bytes, where size is an int.
599
600 If the argument is omitted, None, or negative, reads and
601 returns all data until EOF.
602
603 If the argument is positive, and the underlying raw stream is
604 not 'interactive', multiple raw reads may be issued to satisfy
605 the byte count (unless EOF is reached first). But for
606 interactive raw streams, at most one raw read will be issued, and a
607 short result does not imply that EOF is imminent.
608
609 Returns an empty bytes array on EOF.
610
611 Raises BlockingIOError if the underlying raw stream has no
612 data at the moment.
613 """
614 self._unsupported("read")
615
616 def read1(self, size=None):
617 """Read up to size bytes with at most one read() system call,
618 where size is an int.
619 """
620 self._unsupported("read1")
621
622 def readinto(self, b):
623 """Read bytes into a pre-allocated bytes-like object b.
624
625 Like read(), this may issue multiple reads to the underlying raw
626 stream, unless the latter is 'interactive'.
627
628 Returns an int representing the number of bytes read (0 for EOF).
629
630 Raises BlockingIOError if the underlying raw stream has no
631 data at the moment.
632 """
633
634 return self._readinto(b, read1=False)
635
636 def readinto1(self, b):
637 """Read bytes into buffer *b*, using at most one system call
638
639 Returns an int representing the number of bytes read (0 for EOF).
640
641 Raises BlockingIOError if the underlying raw stream has no
642 data at the moment.
643 """
644
645 return self._readinto(b, read1=True)
646
647 def _readinto(self, b, read1):
648 if not _memoryview_check(b):
649 b = memoryview(b)
650 # TODO(emacs): Here and throughout this file, come up with a better
651 # buffer / byteslike name than "b"
652 b = b.cast("B")
653
654 if read1:
655 data = self.read1(len(b))
656 else:
657 data = self.read(len(b))
658 n = len(data)
659
660 # TODO(T47880928): Implement memoryview.__setitem__ and
661 # _memoryview_setslice so we can use that here
662 b[:n] = data
663
664 return n
665
666 def write(self, b):
667 """Write the given bytes buffer to the IO stream.
668
669 Return the number of bytes written, which is always the length of b
670 in bytes.
671
672 Raises BlockingIOError if the buffer is full and the
673 underlying raw stream cannot accept more data at the moment.
674 """
675 self._unsupported("write")
676
677 def detach(self):
678 """
679 Separate the underlying raw stream from the buffer and return it.
680
681 After the raw stream has been detached, the buffer is in an unusable
682 state.
683 """
684 self._unsupported("detach")
685
686
687class _BufferedIOMixin(_BufferedIOBase, bootstrap=True):
688 def __init__(self, raw):
689 self._raw = raw
690
691 def __repr__(self):
692 try:
693 name = self.name
694 except Exception:
695 return f"<{_type_name(self.__class__)}>"
696 else:
697 return f"<{_type_name(self.__class__)} name={name!r}>"
698
699 ### Positioning ###
700
701 def seek(self, pos, whence=0):
702 _detached_guard(self)
703 _whence_guard(whence)
704 new_position = self.raw.seek(pos, whence)
705 if new_position < 0:
706 raise OSError("seek() returned an invalid position")
707 return new_position
708
709 def tell(self):
710 _detached_guard(self)
711 pos = self.raw.tell()
712 if pos < 0:
713 raise OSError("tell() returned an invalid position")
714 return pos
715
716 def truncate(self, pos=None):
717 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
718 # and a flush may be necessary to synch both views of the current
719 # file state.
720 self.flush()
721
722 if pos is None:
723 pos = self.tell()
724 return self.raw.truncate(pos)
725
726 ### Flush and close ###
727
728 def flush(self):
729 if self.closed:
730 raise ValueError("flush of closed file")
731 self.raw.flush()
732
733 def close(self):
734 if not self.closed:
735 try:
736 # may raise BlockingIOError or BrokenPipeError etc
737 self.flush()
738 finally:
739 self.raw.close()
740
741 def detach(self):
742 _detached_guard(self)
743 self.flush()
744 raw = self._raw
745 self._raw = None
746 return raw
747
748 ### Inquiries ###
749
750 def readable(self):
751 _detached_guard(self)
752 return self.raw.readable()
753
754 def seekable(self):
755 _detached_guard(self)
756 return self.raw.seekable()
757
758 def writable(self):
759 _detached_guard(self)
760 return self.raw.writable()
761
762 @property
763 def raw(self):
764 return self._raw
765
766 @property
767 def closed(self):
768 _detached_guard(self)
769 return self.raw.closed
770
771 @property
772 def name(self):
773 _detached_guard(self)
774 return self.raw.name
775
776 @property
777 def mode(self):
778 _detached_guard(self)
779 return self.raw.mode
780
781 ### Lower-level APIs ###
782
783 def fileno(self):
784 _detached_guard(self)
785 return self.raw.fileno()
786
787 def isatty(self):
788 _detached_guard(self)
789 return self.raw.isatty()
790
791
792class BufferedRWPair(_BufferedIOBase, metaclass=_non_heaptype):
793 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
794 if not reader.readable():
795 raise UnsupportedOperation('"reader" argument must be readable.')
796
797 if not writer.writable():
798 raise UnsupportedOperation('"writer" argument must be writable.')
799
800 self.reader = BufferedReader(reader, buffer_size)
801 self.writer = BufferedWriter(writer, buffer_size)
802
803 def close(self):
804 try:
805 self.writer.close()
806 finally:
807 self.reader.close()
808
809 @property
810 def closed(self):
811 return self.writer.closed
812
813 def flush(self):
814 return self.writer.flush()
815
816 def isatty(self):
817 return self.writer.isatty() or self.reader.isatty()
818
819 def peek(self, size=0):
820 return self.reader.peek(size)
821
822 def read(self, size=None):
823 if size is None:
824 size = -1
825 return self.reader.read(size)
826
827 def read1(self, size):
828 return self.reader.read1(size)
829
830 def readable(self):
831 return self.reader.readable()
832
833 def readinto(self, b):
834 return self.reader.readinto(b)
835
836 def readinto1(self, b):
837 return self.reader.readinto1(b)
838
839 def writable(self):
840 return self.writer.writable()
841
842 def write(self, b):
843 return self.writer.write(b)
844
845
846class BufferedRandom(_BufferedIOMixin, bootstrap=True):
847 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
848 if not raw.seekable():
849 raise UnsupportedOperation("File or stream is not seekable.")
850 if not raw.writable():
851 raise UnsupportedOperation("File or stream is not writable.")
852
853 _BufferedIOMixin.__init__(self, raw)
854 if buffer_size <= 0:
855 raise ValueError("buffer size must be strictly positive")
856 self._reader = BufferedReader(raw, buffer_size)
857 self.buffer_size = buffer_size
858 self._write_lock = _thread_Lock()
859 self._write_buf = bytearray() # TODO(T47880928): use a memoryview
860
861 def _flush_unlocked(self):
862 return BufferedWriter._flush_unlocked(self)
863
864 def _readinto(self, buf, read1):
865 return self._reader.readinto(buf, read1)
866
867 def close(self):
868 _BufferedIOMixin.close(self)
869 self._reader.close()
870
871 def detach(self):
872 raw = _BufferedIOMixin.detach(self)
873 self._reader.detach()
874 return raw
875
876 def flush(self):
877 with self._write_lock:
878 self._flush_unlocked()
879
880 def peek(self, size=0):
881 if self.closed:
882 raise ValueError("peek of closed file")
883 self.flush()
884 return _buffered_reader_peek(self._reader, size)
885
886 def read(self, size=None):
887 if self.closed:
888 raise ValueError("read of closed file")
889 self.flush()
890 return _buffered_reader_read(self._reader, size)
891
892 def readline(self, size=None):
893 if self.closed:
894 raise ValueError("read of closed file")
895 self.flush()
896 return _buffered_reader_readline(self._reader, size)
897
898 def read1(self, size):
899 if self.closed:
900 raise ValueError("read of closed file")
901 self.flush()
902 return self._reader.read1(size)
903
904 def readinto(self, b):
905 self.flush()
906 return self._reader.readinto(b)
907
908 def readinto1(self, b):
909 self.flush()
910 return self._reader.readinto1(b)
911
912 def seek(self, pos, whence=0):
913 if self.closed:
914 raise ValueError("seek of closed file")
915 self.flush()
916 return self._reader.seek(pos, whence)
917
918 def tell(self):
919 if self._write_buf:
920 return BufferedWriter.tell(self)
921 return self._reader.tell()
922
923 def truncate(self, pos=None):
924 return BufferedWriter.truncate(self, self.tell() if pos is None else pos)
925
926 def write(self, b):
927 if self.closed:
928 raise ValueError("write to closed file")
929 # reset read buffer
930 reader = self._reader
931 _buffered_reader_clear_buffer(reader)
932 return BufferedWriter.write(self, b)
933
934
935class BufferedReader(_BufferedIOMixin, bootstrap=True):
936 def close(self):
937 _buffered_reader_clear_buffer(self)
938 return _BufferedIOMixin.close(self)
939
940 def detach(self):
941 _buffered_reader_clear_buffer(self)
942 return _BufferedIOMixin.detach(self)
943
944 peek = _buffered_reader_peek
945
946 read = _buffered_reader_read
947
948 def read1(self, size=-1):
949 if size < 0:
950 size = self._buffer_size
951 if size == 0:
952 return b""
953 # Returns up to size bytes. If at least one byte is buffered, we only
954 # return buffered bytes. Otherwise, we do one raw read.
955 _buffered_reader_peek(self, 1)
956 available = self._buffer_num_bytes - self._read_pos
957 return _buffered_reader_read(self, min(size, available))
958
959 readline = _buffered_reader_readline
960
961 def seek(self, pos, whence=0):
962 _whence_guard(whence)
963 if whence == 1:
964 pos -= self._buffer_num_bytes - self._read_pos
965 _buffered_reader_clear_buffer(self)
966 return _BufferedIOMixin.seek(self, pos, whence)
967
968 def tell(self):
969 return _BufferedIOMixin.tell(self) - self._buffer_num_bytes + self._read_pos
970
971 # TODO(emacs): Write an optimized _readinto
972
973 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
974 if not raw.readable():
975 raise UnsupportedOperation("File or stream is not readable.")
976
977 _BufferedIOMixin.__init__(self, raw)
978 buffer_size = _index(buffer_size)
979 if buffer_size <= 0:
980 raise ValueError("invalid buffer size")
981 _buffered_reader_init(self, buffer_size)
982
983
984class BufferedWriter(_BufferedIOMixin, bootstrap=True):
985 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
986 if not raw.writable():
987 raise UnsupportedOperation("File or stream is not writable.")
988
989 _BufferedIOMixin.__init__(self, raw)
990 if buffer_size <= 0:
991 raise ValueError("buffer size must be strictly positive")
992 self.buffer_size = buffer_size
993 self._write_buf = bytearray() # TODO(T47880928): use a memoryview
994 self._write_lock = _thread_Lock()
995
996 def _flush_unlocked(self):
997 if self.closed:
998 raise ValueError("flush of closed file")
999 while self._write_buf:
1000 try:
1001 n = self.raw.write(self._write_buf)
1002 except BlockingIOError:
1003 raise RuntimeError(
1004 "self.raw should implement RawIOBase: "
1005 "it should not raise BlockingIOError"
1006 )
1007 if n is None:
1008 raise BlockingIOError(
1009 errno_EAGAIN, "write could not complete without blocking", 0
1010 )
1011 if n < 0 or n > _bytearray_len(self._write_buf):
1012 raise IOError(
1013 f"raw write() returned invalid length {n} (should have "
1014 f"been between 0 and {_bytearray_len(self._write_buf)})"
1015 )
1016 del self._write_buf[:n]
1017
1018 def flush(self):
1019 with self._write_lock:
1020 self._flush_unlocked()
1021
1022 def seek(self, pos, whence=0):
1023 _whence_guard(whence)
1024 with self._write_lock:
1025 self._flush_unlocked()
1026 return _BufferedIOMixin.seek(self, pos, whence)
1027
1028 def tell(self):
1029 return _BufferedIOMixin.tell(self) + _bytearray_len(self._write_buf)
1030
1031 def truncate(self, pos=None):
1032 with self._write_lock:
1033 self._flush_unlocked()
1034 if pos is None:
1035 pos = self.raw.tell()
1036 return self.raw.truncate(pos)
1037
1038 def writable(self):
1039 return self.raw.writable()
1040
1041 def write(self, b):
1042 if self.closed:
1043 raise ValueError("write to closed file")
1044 if _str_check(b):
1045 raise TypeError("can't write str to binary stream")
1046 with self._write_lock:
1047 if _bytearray_len(self._write_buf) > self.buffer_size:
1048 # We're full, so let's pre-flush the buffer. (This may raise
1049 # BlockingIOError with characters_written == 0.)
1050 self._flush_unlocked()
1051 before = _bytearray_len(self._write_buf)
1052 self._write_buf.extend(b)
1053 written = _bytearray_len(self._write_buf) - before
1054 if _bytearray_len(self._write_buf) > self.buffer_size:
1055 try:
1056 self._flush_unlocked()
1057 except BlockingIOError as e:
1058 if _bytearray_len(self._write_buf) > self.buffer_size:
1059 # We've hit the buffer_size. We have to accept a partial
1060 # write and cut back our buffer.
1061 overage = _bytearray_len(self._write_buf) - self.buffer_size
1062 written -= overage
1063 self._write_buf = self._write_buf[: self.buffer_size]
1064 raise BlockingIOError(e.errno, e.strerror, written)
1065 return written
1066
1067
1068class BytesIO(_BufferedIOBase, bootstrap=True):
1069 """Buffered I/O implementation using an in-memory bytes buffer."""
1070
1071 def __init__(self, initial_bytes=None):
1072 _builtin()
1073
1074 def __getstate__(self):
1075 _unimplemented("BytesIO.__getstate__")
1076 # if self.closed:
1077 # raise ValueError("__getstate__ on closed file")
1078 # return (self.getvalue(), self._pos, self.__dict__.copy())
1079
1080 def getvalue(self):
1081 """Return the bytes value (contents) of the buffer"""
1082 _builtin()
1083
1084 def getbuffer(self):
1085 """Return a readable and writable view of the buffer."""
1086 _BytesIO_closed_guard(self)
1087 return memoryview(self._buffer)
1088
1089 def close(self):
1090 _BytesIO_guard(self)
1091 self._closed = True
1092 self._buffer = None
1093
1094 def read(self, size=-1):
1095 _builtin()
1096
1097 read1 = read
1098
1099 def write(self, b):
1100 _builtin()
1101
1102 def seek(self, pos, whence=0):
1103 result = _BytesIO_seek(self, pos, whence)
1104 if result is not _Unbound:
1105 return result
1106 return _BytesIO_seek(self, _index(pos), _obj_as_int(whence))
1107
1108 def tell(self):
1109 _BytesIO_closed_guard(self)
1110 return self._pos
1111
1112 def truncate(self, pos=None):
1113 if pos is None:
1114 return _BytesIO_truncate(self, pos)
1115 return _BytesIO_truncate(self, _index(pos))
1116
1117 def readable(self):
1118 _BytesIO_closed_guard(self)
1119 return True
1120
1121 def writable(self):
1122 _BytesIO_closed_guard(self)
1123 return True
1124
1125 def seekable(self):
1126 _BytesIO_closed_guard(self)
1127 return True
1128
1129 @property
1130 def closed(self):
1131 return self._closed
1132
1133
1134class FileIO(_RawIOBase, bootstrap=True):
1135 def __init__(self, file, mode="r", closefd=True, opener=None): # noqa: C901
1136 if _float_check(file):
1137 raise TypeError("integer argument expected, got float")
1138 fd = -1
1139 if _int_check(file):
1140 if file < 0:
1141 raise ValueError("negative file descriptor")
1142 fd = file
1143
1144 if not _str_check(mode):
1145 raise TypeError(f"invalid mode for FileIO: {mode!s}")
1146 mode_set = frozenset(mode)
1147 if not mode_set <= frozenset("xrwab+"):
1148 raise ValueError(f"invalid mode: {mode!s}")
1149 # Is mode non empty, with exactly one of r, w, a, or x, and maybe a +
1150 # i.e. it should match [rwax]\+?
1151 if sum(c in "rwax" for c in mode) != 1 or mode.count("+") > 1:
1152 raise ValueError(
1153 "Must have exactly one of create/read/write/append "
1154 "mode and at most one plus"
1155 )
1156
1157 appending = False
1158 created = False
1159 closed = False
1160 readable = False
1161 seekable = None
1162 writable = False
1163
1164 if "x" in mode:
1165 created = True
1166 writable = True
1167 elif "r" in mode:
1168 readable = True
1169 elif "w" in mode:
1170 writable = True
1171 elif "a" in mode:
1172 writable = True
1173 appending = True
1174
1175 if "+" in mode:
1176 readable = True
1177 writable = True
1178
1179 flags = _os_parse_mode(mode)
1180 self.name = file
1181
1182 # TODO(T86943617): call sys.audit
1183
1184 if fd < 0:
1185 # file was not an int, so we have to open it
1186 if not closefd:
1187 raise ValueError("Cannot use closefd=False with file name")
1188 if opener is None:
1189 fd = _os_open(file, flags, 0o666)
1190 else:
1191 fd = opener(file, flags)
1192 if not _int_check(fd):
1193 raise TypeError("expected integer from opener")
1194 if fd < 0:
1195 raise ValueError(f"opener returned {fd}")
1196
1197 try:
1198 if opener:
1199 _os_set_noinheritable(fd)
1200
1201 if _os_isdir(fd):
1202 raise IsADirectoryError(errno_EISDIR, "Is a directory")
1203
1204 # TODO(T52792779): Don't translate newlines if _setmode is non-None
1205 # by setting O_BINARY
1206
1207 if appending:
1208 # For consistent behavior, we explicitly seek to the end of
1209 # file (otherwise, it might be done only on the first write()).
1210 _os_lseek(fd, 0, 2)
1211 except Exception:
1212 _os_close(fd)
1213 raise
1214
1215 self._fd = fd
1216 self._closed = closed
1217 self._closefd = closefd
1218 self._appending = appending
1219 self._created = created
1220 self._readable = readable
1221 self._seekable = seekable
1222 self._writable = writable
1223
1224 def __del__(self):
1225 if not self.closed and self._closefd:
1226 _warn(f"unclosed file {self!r}", ResourceWarning, stacklevel=2, source=self)
1227 self.close()
1228
1229 def __repr__(self):
1230 class_name = f"_io.{self.__class__.__qualname__}"
1231 if self.closed:
1232 return f"<{class_name} [closed]>"
1233 try:
1234 name = self.name
1235 except AttributeError:
1236 return (
1237 f"<{class_name} name={self._fd} "
1238 f"mode={self.mode!r} closefd={self._closefd!r}>"
1239 )
1240 else:
1241 return (
1242 f"<{class_name} name={name!r} "
1243 f"mode={self.mode!r} closefd={self._closefd!r}>"
1244 )
1245
1246 def _checkReadable(self):
1247 if not self._readable:
1248 raise UnsupportedOperation("File not open for reading")
1249
1250 def _checkWritable(self, msg=None):
1251 if not self._writable:
1252 raise UnsupportedOperation("File not open for writing")
1253
1254 def read(self, size=None):
1255 self._checkClosed()
1256 self._checkReadable()
1257 if size is None or size < 0:
1258 return FileIO.readall(self)
1259 try:
1260 return _os_read(self._fd, size)
1261 except BlockingIOError:
1262 return None
1263
1264 def readall(self):
1265 _builtin()
1266
1267 def readinto(self, byteslike):
1268 _builtin()
1269
1270 def write(self, byteslike):
1271 self._checkClosed()
1272 self._checkWritable()
1273 buf = byteslike
1274 if not _byteslike_check(byteslike):
1275 if not _object_type_hasattr(byteslike, "__buffer__"):
1276 raise TypeError(
1277 "a bytes-like object is required, not "
1278 f"'{_type(byteslike).__name__}'"
1279 )
1280 try:
1281 buf = byteslike.__buffer__()
1282 except Exception:
1283 raise TypeError(
1284 "a bytes-like object is required, not "
1285 f"'{_type(byteslike).__name__}'"
1286 )
1287 if not _bytes_check(buf):
1288 raise TypeError(
1289 "a bytes-like object is required, not "
1290 f"'{_type(byteslike).__name__}'"
1291 )
1292 try:
1293 return _os_write(self._fd, buf)
1294 except BlockingIOError:
1295 return None
1296
1297 def seek(self, pos, whence=0):
1298 _whence_guard(whence)
1299 if _float_check(pos):
1300 raise TypeError("an integer is required")
1301 self._checkClosed()
1302 return _os_lseek(self._fd, pos, whence)
1303
1304 def tell(self):
1305 self._checkClosed()
1306 return _os_lseek(self._fd, 0, 1)
1307
1308 def truncate(self, size=None):
1309 self._checkClosed()
1310 self._checkWritable()
1311 if size is None:
1312 size = self.tell()
1313 _os_ftruncate(self._fd, size)
1314 return size
1315
1316 def close(self):
1317 if not self.closed:
1318 try:
1319 if self._closefd:
1320 _os_close(self._fd)
1321 finally:
1322 _RawIOBase.close(self)
1323 self._fd = -1
1324
1325 def seekable(self):
1326 self._checkClosed()
1327 if self._seekable is None:
1328 try:
1329 FileIO.tell(self)
1330 self._seekable = True
1331 except OSError:
1332 self._seekable = False
1333 return self._seekable
1334
1335 def readable(self):
1336 self._checkClosed()
1337 return self._readable
1338
1339 def writable(self):
1340 self._checkClosed()
1341 return self._writable
1342
1343 def fileno(self):
1344 self._checkClosed()
1345 return self._fd
1346
1347 def isatty(self):
1348 self._checkClosed()
1349 return _os_isatty(self._fd)
1350
1351 @property
1352 def closefd(self):
1353 return self._closefd
1354
1355 @property
1356 def mode(self):
1357 if self._created:
1358 if self._readable:
1359 return "xb+"
1360 else:
1361 return "xb"
1362 elif self._appending:
1363 if self._readable:
1364 return "ab+"
1365 else:
1366 return "ab"
1367 elif self._readable:
1368 if self._writable:
1369 return "rb+"
1370 else:
1371 return "rb"
1372 else:
1373 return "wb"
1374
1375
1376class TextIOWrapper(_TextIOBase, bootstrap=True):
1377 _CHUNK_SIZE = 2048
1378
1379 # The write_through argument has no effect here since this
1380 # implementation always writes through. The argument is present only
1381 # so that the signature can match the signature of the C version.
1382 def __init__( # noqa: C901
1383 self,
1384 buffer,
1385 encoding=None,
1386 errors=None,
1387 newline=None,
1388 line_buffering=False,
1389 write_through=False,
1390 ):
1391 # Argument parsing happens first in CPython's _io module
1392 if encoding is None:
1393 encoding = "UTF-8"
1394
1395 elif not _str_check(encoding):
1396 raise TypeError(
1397 "TextIOWrapper() argument 2 must be str or None, not "
1398 f"{_type(encoding).__name__}"
1399 )
1400 if errors is None:
1401 errors = "strict"
1402 elif not _str_check(errors):
1403 raise TypeError(
1404 "TextIOWrapper() argument 'errors' must be str or None, not "
1405 f"{_type(errors).__name__}"
1406 )
1407 if newline is not None and not _str_check(newline):
1408 raise TypeError(
1409 "TextIOWrapper() argument 4 must be str or None, not "
1410 f"{_type(newline).__name__}"
1411 )
1412 if line_buffering is None:
1413 line_buffering = False
1414 elif not _int_check(line_buffering):
1415 raise TypeError(
1416 f"an integer is required (got type {_type(line_buffering).__name__})"
1417 )
1418 if write_through is None:
1419 write_through = False
1420 elif not _int_check(write_through):
1421 raise TypeError(
1422 f"an integer is required (got type {_type(write_through).__name__})"
1423 )
1424
1425 if newline not in (None, "", "\n", "\r", "\r\n"):
1426 raise ValueError(f"illegal newline value: {newline}")
1427
1428 self._buffer = buffer
1429 self._line_buffering = bool(line_buffering)
1430 self._encoding = encoding
1431 self._errors = errors
1432 self._readuniversal = not newline
1433 self._readtranslate = newline is None
1434 self._readnl = newline
1435 self._writetranslate = newline != ""
1436 self._writenl = newline or _os_linesep
1437 self._decoder = self._get_decoder() if buffer.readable() else None
1438 self._encoder = self._get_encoder() if buffer.writable() else None
1439 self._decoded_chars = "" # buffer for text returned from decoder
1440 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1441 self._snapshot = None # info for reconstructing decoder state
1442 self._seekable = self._telling = buffer.seekable()
1443 self._has_read1 = hasattr(buffer, "read1")
1444 self._b2cratio = 0.0
1445
1446 if self._seekable and self._encoder:
1447 position = self.buffer.tell()
1448 if position != 0:
1449 self._encoder.setstate(0)
1450
1451 def __next__(self):
1452 _TextIOWrapper_attached_guard(self)
1453 self._telling = False
1454 line = self.readline()
1455 if not _str_check(line):
1456 raise IOError(
1457 "readline() should have returned a str object, not "
1458 f"'{_type(line).__name__}'"
1459 )
1460 if not line:
1461 self._snapshot = None
1462 self._telling = self._seekable
1463 raise StopIteration
1464 return line
1465
1466 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1467 # where dec_flags is the second (integer) item of the decoder state
1468 # and next_input is the chunk of input bytes that comes next after the
1469 # snapshot point. We use this to reconstruct decoder states in tell().
1470
1471 # Naming convention:
1472 # - "bytes_..." for integer variables that count input bytes
1473 # - "chars_..." for integer variables that count decoded characters
1474
1475 def __repr__(self):
1476 try:
1477 name_component = f" name={self.name!r}"
1478 except Exception:
1479 name_component = ""
1480 try:
1481 mode_component = "" if self.mode is None else f" mode={self.mode!r}"
1482 except Exception:
1483 mode_component = ""
1484 return (
1485 f"<_io.TextIOWrapper{name_component}"
1486 f"{mode_component} encoding={self._encoding!r}>"
1487 )
1488
1489 def _get_decoded_chars(self, n=None):
1490 offset = self._decoded_chars_used
1491 if n is None:
1492 chars = self._decoded_chars[offset:]
1493 else:
1494 chars = self._decoded_chars[offset : offset + n]
1495 self._decoded_chars_used += len(chars)
1496 return chars
1497
1498 def _get_decoder(self):
1499 make_decoder = _codecs_getincrementaldecoder(self._encoding)
1500 decoder = make_decoder(self._errors)
1501 if self._readuniversal:
1502 return IncrementalNewlineDecoder(decoder, self._readtranslate)
1503 return decoder
1504
1505 def _get_encoder(self):
1506 make_encoder = _codecs_getincrementalencoder(self._encoding)
1507 return make_encoder(self._errors)
1508
1509 def _pack_cookie(
1510 self, position, dec_flags=0, bytes_to_feed=0, need_eof=0, chars_to_skip=0
1511 ):
1512 # The meaning of a tell() cookie is: seek to position, set the
1513 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1514 # into the decoder with need_eof as the EOF flag, then skip
1515 # chars_to_skip characters of the decoded result. For most simple
1516 # decoders, tell() will often just give a byte offset in the file.
1517 return (
1518 position
1519 | (dec_flags << 64)
1520 | (bytes_to_feed << 128)
1521 | (chars_to_skip << 192)
1522 | bool(need_eof) << 256
1523 )
1524
1525 def _read_chunk(self):
1526 # The return value is True unless EOF was reached. The decoded
1527 # string is placed in self._decoded_chars (replacing its previous
1528 # value). The entire input chunk is sent to the decoder, though
1529 # some of it may remain buffered in the decoder, yet to be
1530 # converted.
1531
1532 if self._decoder is None:
1533 raise UnsupportedOperation("not readable")
1534
1535 if self._telling:
1536 # To prepare for tell(), we need to snapshot a point in the
1537 # file where the decoder's input buffer is empty.
1538
1539 dec_buffer, dec_flags = self._decoder.getstate()
1540 # Given this, we know there was a valid snapshot point
1541 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1542 if not _bytes_check(dec_buffer):
1543 raise TypeError(
1544 "illegal decoder state: the first item should be a bytes "
1545 f"object, not '{_type(dec_buffer).__name__}'"
1546 )
1547
1548 # Read a chunk, decode it, and put the result in self._decoded_chars.
1549 if self._has_read1:
1550 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1551 else:
1552 input_chunk = self.buffer.read(self._CHUNK_SIZE)
1553 eof = not input_chunk
1554 decoded_chars = self._decoder.decode(input_chunk, eof)
1555 self._set_decoded_chars(decoded_chars)
1556 if decoded_chars:
1557 self._b2cratio = len(input_chunk) / len(self._decoded_chars)
1558 else:
1559 self._b2cratio = 0.0
1560
1561 if self._telling:
1562 # At the snapshot point, len(dec_buffer) bytes before the read,
1563 # the next input to be decoded is dec_buffer + input_chunk.
1564 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1565
1566 return not eof
1567
1568 def _rewind_decoded_chars(self, n):
1569 if self._decoded_chars_used < n:
1570 raise AssertionError("rewind decoded_chars out of bounds")
1571 self._decoded_chars_used -= n
1572
1573 # The following three methods implement an ADT for _decoded_chars.
1574 # Text returned from the decoder is buffered here until the client
1575 # requests it by calling our read() or readline() method.
1576 def _set_decoded_chars(self, chars):
1577 self._decoded_chars = chars
1578 self._decoded_chars_used = 0
1579
1580 def _unpack_cookie(self, bigint):
1581 rest, position = divmod(bigint, 1 << 64)
1582 rest, dec_flags = divmod(rest, 1 << 64)
1583 rest, bytes_to_feed = divmod(rest, 1 << 64)
1584 need_eof, chars_to_skip = divmod(rest, 1 << 64)
1585 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1586
1587 @property
1588 def buffer(self):
1589 return self._buffer
1590
1591 def close(self):
1592 _TextIOWrapper_attached_guard(self)
1593 if not self.closed:
1594 try:
1595 self.flush()
1596 finally:
1597 self._buffer.close()
1598
1599 @property
1600 def closed(self):
1601 _TextIOWrapper_attached_guard(self)
1602 return self._buffer.closed
1603
1604 def detach(self):
1605 _TextIOWrapper_attached_guard(self)
1606 self.flush()
1607 buffer = self._buffer
1608 self._buffer = None
1609 return buffer
1610
1611 @property
1612 def encoding(self):
1613 return self._encoding
1614
1615 @property
1616 def errors(self):
1617 return self._errors
1618
1619 def fileno(self):
1620 _TextIOWrapper_attached_guard(self)
1621 return self._buffer.fileno()
1622
1623 def flush(self):
1624 result = _TextIOWrapper_attached_closed_guard(self)
1625 if result is _Unbound:
1626 self._checkClosed()
1627 self.buffer.flush()
1628 self._telling = self._seekable
1629
1630 def isatty(self):
1631 _TextIOWrapper_attached_guard(self)
1632 return self.buffer.isatty()
1633
1634 @property
1635 def line_buffering(self):
1636 return self._line_buffering
1637
1638 @property
1639 def name(self):
1640 _TextIOWrapper_attached_guard(self)
1641 return self._buffer.name
1642
1643 @property
1644 def newlines(self):
1645 _TextIOWrapper_attached_guard(self)
1646 if self._decoder is None:
1647 return None
1648 try:
1649 return self._decoder.newlines
1650 except AttributeError:
1651 return None
1652
1653 def read(self, size=None):
1654 if size is None:
1655 size = -1
1656 elif not _int_check(size):
1657 raise TypeError(f"integer argument expected, got '{_type(size).__name__}'")
1658
1659 result = _TextIOWrapper_attached_closed_guard(self)
1660 if result is _Unbound:
1661 self._checkClosed()
1662 self._checkReadable("not readable")
1663
1664 decoder = self._decoder
1665 try:
1666 size.__index__
1667 except AttributeError as err:
1668 raise TypeError("an integer is required") from err
1669 if size < 0:
1670 # Read everything.
1671 result = self._get_decoded_chars() + decoder.decode(
1672 self._buffer.read(), final=True
1673 )
1674 self._set_decoded_chars("")
1675 self._snapshot = None
1676 return result
1677 else:
1678 # Keep reading chunks until we have size characters to return.
1679 eof = False
1680 result = self._get_decoded_chars(size)
1681 while len(result) < size and not eof:
1682 eof = not self._read_chunk()
1683 result += self._get_decoded_chars(size - len(result))
1684 return result
1685
1686 def readable(self):
1687 _TextIOWrapper_attached_guard(self)
1688 return self._buffer.readable()
1689
1690 def readline(self, size=None): # noqa: C901
1691 result = _TextIOWrapper_attached_closed_guard(self)
1692 if result is _Unbound:
1693 self._checkClosed()
1694
1695 if size is None:
1696 size = -1
1697 elif not _int_check(size):
1698 size = _index(size)
1699
1700 # Grab all the decoded text (we will rewind any extra bits later).
1701 line = self._get_decoded_chars()
1702
1703 start = 0
1704
1705 pos = endpos = None
1706 while True:
1707 if self._readtranslate:
1708 # Newlines are already translated, only search for \n
1709 pos = line.find("\n", start)
1710 if pos >= 0:
1711 endpos = pos + 1
1712 break
1713 else:
1714 start = len(line)
1715
1716 elif self._readuniversal:
1717 # Universal newline search. Find any of \r, \r\n, \n
1718 # The decoder ensures that \r\n are not split in two pieces
1719 nlpos = line.find("\n", start)
1720 crpos = line.find("\r", start)
1721 if crpos == -1:
1722 if nlpos == -1:
1723 # Nothing found
1724 start = len(line)
1725 else:
1726 # Found \n
1727 endpos = nlpos + 1
1728 break
1729 elif nlpos == -1:
1730 # Found lone \r
1731 endpos = crpos + 1
1732 break
1733 elif nlpos < crpos:
1734 # Found \n
1735 endpos = nlpos + 1
1736 break
1737 elif nlpos == crpos + 1:
1738 # Found \r\n
1739 endpos = crpos + 2
1740 break
1741 else:
1742 # Found \r
1743 endpos = crpos + 1
1744 break
1745 else:
1746 # non-universal
1747 pos = line.find(self._readnl)
1748 if pos >= 0:
1749 endpos = pos + len(self._readnl)
1750 break
1751
1752 if size >= 0 and len(line) >= size:
1753 endpos = size # reached length size
1754 break
1755
1756 # No line ending seen yet - get more data'
1757 while self._read_chunk():
1758 if self._decoded_chars:
1759 break
1760 if self._decoded_chars:
1761 line += self._get_decoded_chars()
1762 else:
1763 # end of file
1764 self._set_decoded_chars("")
1765 self._snapshot = None
1766 return line
1767
1768 if size >= 0 and endpos > size:
1769 endpos = size # don't exceed size
1770
1771 # Rewind _decoded_chars to just after the line ending we found.
1772 self._rewind_decoded_chars(len(line) - endpos)
1773 return line[:endpos]
1774
1775 def _reset_encoder(self, position):
1776 if self._encoder:
1777 if position != 0:
1778 self._encoder.setstate(0)
1779 else:
1780 self._encoder.reset()
1781
1782 def seek(self, cookie, whence=0): # noqa: C901
1783 if not _int_check(whence):
1784 raise TypeError(
1785 f"an integer is required (got type {_type(whence).__name__})"
1786 )
1787 result = _TextIOWrapper_attached_closed_seekable_guard(self)
1788 if result is _Unbound:
1789 self._checkClosed()
1790 self._checkSeekable("underlying stream is not seekable")
1791
1792 if whence == 1: # seek relative to current position
1793 if cookie != 0:
1794 raise UnsupportedOperation("can't do nonzero cur-relative seeks")
1795 # Seeking to the current position should attempt to
1796 # sync the underlying buffer with the current position.
1797 whence = 0
1798 cookie = self.tell()
1799 elif whence == 2: # seek relative to end of file
1800 if cookie != 0:
1801 raise UnsupportedOperation("can't do nonzero end-relative seeks")
1802 self.flush()
1803 position = self.buffer.seek(0, 2)
1804 self._set_decoded_chars("")
1805 self._snapshot = None
1806 if self._decoder:
1807 self._decoder.reset()
1808 self._reset_encoder(position)
1809 return position
1810 elif whence != 0:
1811 raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)")
1812 if cookie < 0:
1813 raise ValueError(f"negative seek position {cookie!r}")
1814 self.flush()
1815
1816 # The strategy of seek() is to go back to the safe start point
1817 # and replay the effect of read(chars_to_skip) from there.
1818 unpacked = self._unpack_cookie(cookie)
1819 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = unpacked
1820 # Seek back to the safe start point.
1821 self.buffer.seek(start_pos)
1822 self._set_decoded_chars("")
1823 self._snapshot = None
1824
1825 # Restore the decoder to its state from the safe start point.
1826 if cookie == 0 and self._decoder:
1827 self._decoder.reset()
1828 elif self._decoder or dec_flags or chars_to_skip:
1829 self._decoder = self._decoder
1830 self._decoder.setstate((b"", dec_flags))
1831 self._snapshot = (dec_flags, b"")
1832
1833 if chars_to_skip:
1834 # Just like _read_chunk, feed the decoder and save a snapshot.
1835 input_chunk = self.buffer.read(bytes_to_feed)
1836 self._set_decoded_chars(self._decoder.decode(input_chunk, need_eof))
1837 self._snapshot = (dec_flags, input_chunk)
1838
1839 # Skip chars_to_skip of the decoded characters.
1840 if len(self._decoded_chars) < chars_to_skip:
1841 raise OSError("can't restore logical file position")
1842 self._decoded_chars_used = chars_to_skip
1843
1844 self._reset_encoder(cookie)
1845 return cookie
1846
1847 def seekable(self):
1848 _TextIOWrapper_attached_guard(self)
1849 return self._buffer.seekable()
1850
1851 def tell(self): # noqa: C901
1852 result = _TextIOWrapper_attached_closed_seekable_guard(self)
1853 if result is _Unbound:
1854 self._checkClosed()
1855 self._checkSeekable("underlying stream is not seekable")
1856 if not self._telling:
1857 raise OSError("telling position disabled by next() call")
1858 self.flush()
1859 position = self.buffer.tell()
1860 decoder = self._decoder
1861 if decoder is None or self._snapshot is None:
1862 if self._decoded_chars:
1863 # This should never happen.
1864 raise AssertionError("pending decoded text")
1865 return position
1866
1867 # Skip backward to the snapshot point (see _read_chunk).
1868 dec_flags, next_input = self._snapshot
1869 position -= len(next_input)
1870
1871 # How many decoded characters have been used up since the snapshot?
1872 chars_to_skip = self._decoded_chars_used
1873 if chars_to_skip == 0:
1874 # We haven't moved from the snapshot point.
1875 return self._pack_cookie(position, dec_flags)
1876
1877 # Starting from the snapshot position, we will walk the decoder
1878 # forward until it gives us enough decoded characters.
1879 saved_state = decoder.getstate()
1880 try:
1881 # Fast search for an acceptable start point, close to our
1882 # current pos.
1883 # Rationale: calling decoder.decode() has a large overhead
1884 # regardless of chunk size; we want the number of such calls to
1885 # be O(1) in most situations (common decoders, non-crazy input).
1886 # Actually, it will be exactly 1 for fixed-size codecs (all
1887 # 8-bit codecs, also UTF-16 and UTF-32).
1888 skip_bytes = int(self._b2cratio * chars_to_skip)
1889 skip_back = 1
1890 assert skip_bytes <= len(next_input)
1891 while skip_bytes > 0:
1892 decoder.setstate((b"", dec_flags))
1893 # Decode up to temptative start point
1894 n = len(decoder.decode(next_input[:skip_bytes]))
1895 if n <= chars_to_skip:
1896 b, d = decoder.getstate()
1897 if not b:
1898 # Before pos and no bytes buffered in decoder => OK
1899 dec_flags = d
1900 chars_to_skip -= n
1901 break
1902 # Skip back by buffered amount and reset heuristic
1903 skip_bytes -= len(b)
1904 skip_back = 1
1905 else:
1906 # We're too far ahead, skip back a bit
1907 skip_bytes -= skip_back
1908 skip_back = skip_back * 2
1909 else:
1910 skip_bytes = 0
1911 decoder.setstate((b"", dec_flags))
1912
1913 # Note our initial start point.
1914 start_pos = position + skip_bytes
1915 start_flags = dec_flags
1916 if chars_to_skip == 0:
1917 # We haven't moved from the start point.
1918 return self._pack_cookie(start_pos, start_flags)
1919
1920 # Feed the decoder one byte at a time. As we go, note the
1921 # nearest "safe start point" before the current location
1922 # (a point where the decoder has nothing buffered, so seek()
1923 # can safely start from there and advance to this location).
1924 bytes_fed = 0
1925 need_eof = 0
1926 # Chars decoded since `start_pos`
1927 chars_decoded = 0
1928 for i in range(skip_bytes, len(next_input)):
1929 bytes_fed += 1
1930 chars_decoded += len(decoder.decode(next_input[i : i + 1]))
1931 dec_buffer, dec_flags = decoder.getstate()
1932 if not dec_buffer and chars_decoded <= chars_to_skip:
1933 # Decoder buffer is empty, so this is a safe start point.
1934 start_pos += bytes_fed
1935 chars_to_skip -= chars_decoded
1936 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1937 if chars_decoded >= chars_to_skip:
1938 break
1939 else:
1940 # We didn't get enough decoded data; signal EOF to get more.
1941 chars_decoded += len(decoder.decode(b"", final=True))
1942 need_eof = 1
1943 if chars_decoded < chars_to_skip:
1944 raise OSError("can't reconstruct logical file position")
1945
1946 # The returned cookie corresponds to the last safe start point.
1947 return self._pack_cookie(
1948 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip
1949 )
1950 finally:
1951 decoder.setstate(saved_state)
1952
1953 def truncate(self, pos=None):
1954 _TextIOWrapper_attached_guard(self)
1955 self.flush()
1956 return self.buffer.truncate(pos)
1957
1958 def writable(self):
1959 _TextIOWrapper_attached_guard(self)
1960 return self.buffer.writable()
1961
1962 def write(self, text):
1963 result = _TextIOWrapper_write_UTF8(self, text)
1964 if result is not _Unbound:
1965 return result
1966
1967 if not _str_check(text):
1968 raise TypeError(f"write() argument must be str, not {_type(text).__name__}")
1969 result = _TextIOWrapper_attached_closed_guard(self)
1970 if result is _Unbound:
1971 self._checkClosed()
1972 length = _str_len(text)
1973 haslf = (self._writetranslate or self._line_buffering) and "\n" in text
1974 if haslf and self._writetranslate and self._writenl != "\n":
1975 text = text.replace("\n", self._writenl)
1976 encoder = self._encoder
1977 b = encoder.encode(text)
1978 self.buffer.write(b)
1979 if self._line_buffering and (haslf or "\r" in text):
1980 self.flush()
1981 self._set_decoded_chars("")
1982 self._snapshot = None
1983 if self._decoder:
1984 self._decoder.reset()
1985 return length
1986
1987
1988class StringIO(_TextIOBase, bootstrap=True):
1989 def __init__(self, initial_value="", newline="\n"):
1990 _builtin()
1991
1992 def __next__(self):
1993 _builtin()
1994
1995 def __repr__(self):
1996 return f"<_io.StringIO object at {_address(self):#x}>"
1997
1998 @property
1999 def encoding(self):
2000 return None
2001
2002 @property
2003 def errors(self):
2004 return None
2005
2006 def getvalue(self):
2007 _builtin()
2008
2009 def readable(self):
2010 _StringIO_closed_guard(self)
2011 return True
2012
2013 def seekable(self):
2014 _StringIO_closed_guard(self)
2015 return True
2016
2017 def writable(self):
2018 _StringIO_closed_guard(self)
2019 return True
2020
2021 def close(self):
2022 _builtin()
2023
2024 @property
2025 def closed(self):
2026 return self._closed
2027
2028 @property
2029 def line_buffering(self):
2030 _StringIO_closed_guard(self)
2031 return False
2032
2033 @property
2034 def newlines(self):
2035 if self._readtranslate is None:
2036 return None
2037 return (
2038 None,
2039 "\n",
2040 "\r",
2041 ("\r", "\n"),
2042 "\r\n",
2043 ("\n", "\r\n"),
2044 ("\r", "\r\n"),
2045 ("\r", "\n", "\r\n"),
2046 )[self._seennl]
2047
2048 def read(self, size=None):
2049 _builtin()
2050
2051 def readline(self, size=None):
2052 _builtin()
2053
2054 def seek(self, offset, whence=0):
2055 result = _StringIO_seek(self, offset, whence)
2056 if result is not _Unbound:
2057 return result
2058 return _StringIO_seek(self, _index(offset), _obj_as_int(whence))
2059
2060 def tell(self): # noqa: C901
2061 _StringIO_closed_guard(self)
2062 return self._pos
2063
2064 def truncate(self, size=None):
2065 _builtin()
2066
2067 def write(self, value):
2068 _builtin()
2069
2070
2071def _fspath(obj):
2072 if _str_check(obj) or _bytes_check(obj):
2073 return obj
2074 dunder_fspath = _object_type_getattr(obj, "__fspath__")
2075 if dunder_fspath is _Unbound:
2076 raise TypeError("expected str, bytes, or os.PathLike object")
2077 result = dunder_fspath()
2078 if _str_check(result) or _bytes_check(result):
2079 return result
2080 raise TypeError("expected __fspath__ to return str or bytes")
2081
2082
2083def open( # noqa: C901
2084 file,
2085 mode="r",
2086 buffering=-1,
2087 encoding=None,
2088 errors=None,
2089 newline=None,
2090 closefd=True,
2091 opener=None,
2092):
2093 if not _int_check(file):
2094 file = _fspath(file)
2095 if not _str_check(file) and not _bytes_check(file) and not _int_check(file):
2096 # TODO(emacs): Is this check necessary? os.fspath guarantees str/bytes,
2097 # above check guarantees int or str or bytes
2098 raise TypeError("invalid file: %r" % file)
2099 if not _str_check(mode):
2100 raise TypeError(f"open() argument 2 must be str, not {_type(mode).__name__}")
2101 if not _int_check(buffering):
2102 raise TypeError(
2103 f"an integer is required (got type {_type(buffering).__name__})"
2104 )
2105 if encoding is not None and not _str_check(encoding):
2106 raise TypeError(
2107 f"open() argument 4 must be str or None, not {_type(encoding).__name__}"
2108 )
2109 if errors is not None and not _str_check(errors):
2110 raise TypeError(
2111 f"open() argument 5 must be str or None, not {_type(errors).__name__}"
2112 )
2113 modes = set(mode)
2114 if modes - set("axrwb+tU") or len(mode) > len(modes):
2115 raise ValueError("invalid mode: %r" % mode)
2116 creating = "x" in modes
2117 reading = "r" in modes
2118 writing = "w" in modes
2119 appending = "a" in modes
2120 updating = "+" in modes
2121 text = "t" in modes
2122 binary = "b" in modes
2123 if "U" in modes:
2124 if creating or writing or appending or updating:
2125 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'")
2126 _warn("'U' mode is deprecated", DeprecationWarning, 2)
2127 reading = True
2128 if text and binary:
2129 raise ValueError("can't have text and binary mode at once")
2130 if creating + reading + writing + appending > 1:
2131 raise ValueError("must have exactly one of create/read/write/append mode")
2132 if not (creating or reading or writing or appending):
2133 raise ValueError(
2134 "Must have exactly one of create/read/write/append mode and at "
2135 "most one plus"
2136 )
2137 if binary and encoding is not None:
2138 raise ValueError("binary mode doesn't take an encoding argument")
2139 if binary and errors is not None:
2140 raise ValueError("binary mode doesn't take an errors argument")
2141 if binary and newline is not None:
2142 raise ValueError("binary mode doesn't take a newline argument")
2143 if binary and buffering == 1:
2144 _warn(
2145 "line buffering (buffering=1) isn't supported in "
2146 "binary mode, the default buffer size will be used",
2147 RuntimeWarning,
2148 2,
2149 )
2150 raw = FileIO(
2151 file,
2152 (creating and "x" or "")
2153 + (reading and "r" or "")
2154 + (writing and "w" or "")
2155 + (appending and "a" or "")
2156 + (updating and "+" or ""),
2157 closefd,
2158 opener=opener,
2159 )
2160 result = raw
2161 try:
2162 line_buffering = False
2163 if buffering == 1 or buffering < 0 and raw.isatty():
2164 buffering = -1
2165 line_buffering = True
2166 if buffering < 0:
2167 buffering = DEFAULT_BUFFER_SIZE
2168 if buffering == 0:
2169 if binary:
2170 return result
2171 raise ValueError("can't have unbuffered text I/O")
2172 if updating:
2173 buffer = BufferedRandom(raw, buffering)
2174 elif creating or writing or appending:
2175 buffer = BufferedWriter(raw, buffering)
2176 elif reading:
2177 buffer = BufferedReader(raw, buffering)
2178 else:
2179 raise ValueError("unknown mode: %r" % mode)
2180 result = buffer
2181 if binary:
2182 return result
2183 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
2184 result = text
2185 text.mode = mode
2186 return result
2187 except Exception:
2188 result.close()
2189 raise
2190
2191
2192def open_code(path):
2193 _str_guard(path)
2194 return open(path, "rb")