library/_str_mod.py at trunk · bernsteinbear.com/skybison

bernsteinbear.com / skybison
fork atom
this repo has no description
fork atom
skybison / library / _str_mod.py
at trunk 483 lines 18 kB view raw
wrap content
Max Bernstein Add license headers 4y ago
29d072a3
  1#!/usr/bin/env python3
  2# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
  3"""This is an internal module implementing __mod__ formatting for str and bytes"""
  4
  5from builtins import (
  6    _float,
  7    _index,
  8    _mapping_check,
  9    _number_check,
 10    _str_array,
 11)
 12
 13from _builtins import (
 14    _bytes_check,
 15    _float_check,
 16    _float_format,
 17    _float_signbit,
 18    _int_check,
 19    _str_array_iadd,
 20    _str_check,
 21    _str_len,
 22    _tuple_check,
 23    _tuple_getitem,
 24    _tuple_len,
 25    _type,
 26)
 27
 28
 29_FLAG_LJUST = 1 << 0
 30_FLAG_ZERO = 1 << 1
 31
 32
 33def _format_string(result, flags, width, precision, fragment):
 34    if precision >= 0:
 35        fragment = fragment[:precision]
 36    if width <= 0:
 37        _str_array_iadd(result, fragment)
 38        return
 39
 40    padding_len = -1
 41    padding_len = width - _str_len(fragment)
 42    if padding_len > 0 and not (flags & _FLAG_LJUST):
 43        _str_array_iadd(result, " " * padding_len)
 44        padding_len = 0
 45    _str_array_iadd(result, fragment)
 46    if padding_len > 0:
 47        _str_array_iadd(result, " " * padding_len)
 48
 49
 50def _format_number(result, flags, width, precision, sign, prefix, fragment):
 51    if width <= 0 and precision < 0:
 52        _str_array_iadd(result, sign)
 53        _str_array_iadd(result, prefix)
 54        _str_array_iadd(result, fragment)
 55        return
 56
 57    # Compute a couple values before assembling the result:
 58    # - `padding_len` the number of spaces around the number
 59    #    - _FLAG_LJUST determines whether it is before/after
 60    #    - We compute it by starting with the full width and subtracting the
 61    #      length of everything else we are going to emit.
 62    # - `num_leading_zeros` number of extra zeros to print between prefix and
 63    #    the number.
 64    fragment_len = _str_len(fragment)
 65    padding_len = width - fragment_len - _str_len(sign) - _str_len(prefix)
 66
 67    num_leading_zeros = 0
 68    if precision >= 0:
 69        num_leading_zeros = precision - fragment_len
 70        if num_leading_zeros > 0:
 71            padding_len -= num_leading_zeros
 72
 73    if (flags & _FLAG_ZERO) and not (flags & _FLAG_LJUST):
 74        # Perform padding by increasing precision instead.
 75        if padding_len > 0:
 76            num_leading_zeros += padding_len
 77        padding_len = 0
 78
 79    # Compose the result.
 80    if padding_len > 0 and not (flags & _FLAG_LJUST):
 81        _str_array_iadd(result, " " * padding_len)
 82        padding_len = 0
 83    _str_array_iadd(result, sign)
 84    _str_array_iadd(result, prefix)
 85    if num_leading_zeros > 0:
 86        _str_array_iadd(result, "0" * num_leading_zeros)
 87    _str_array_iadd(result, fragment)
 88    if padding_len > 0:
 89        _str_array_iadd(result, " " * padding_len)
 90
 91
 92_int_format = int.__format__
 93
 94
 95class Formatter:
 96    CATEGORY = None
 97
 98    @staticmethod
 99    def cast(x):
100        raise NotImplementedError("virtual")
101
102    as_str = as_repr = cast
103
104    def percent_c_not_in_range(self):
105        raise NotImplementedError("virtual")
106
107    def percent_c_overflow(self):
108        raise NotImplementedError("virtual")
109
110    def percent_c_requires_int_or_char(self):
111        raise NotImplementedError("virtual")
112
113    def percent_d_a_number_is_required(self, c, tname):
114        raise NotImplementedError("virtual")
115
116    def must_be_real_number(self, float_exception, tname):
117        raise NotImplementedError("virtual")
118
119    def not_all_arguments_converted(self):
120        return TypeError(
121            f"not all arguments converted during {self.CATEGORY} formatting"
122        )
123
124    def format(self, string: [str, bytes], args) -> [str, bytes]:  # noqa: C901
125        string = self.as_str(string)
126
127        args_dict = None
128        if _tuple_check(args):
129            args_tuple = args
130            args_len = _tuple_len(args_tuple)
131        else:
132            args_tuple = (args,)
133            args_len = 1
134        arg_idx = 0
135
136        result = _str_array()
137        idx = -1
138        begin = 0
139        in_specifier = False
140        it = str.__iter__(string)
141        try:
142            while True:
143                c = it.__next__()
144                idx += 1
145                if c is not "%":  # noqa: F632
146                    continue
147
148                _str_array_iadd(result, string[begin:idx])
149
150                in_specifier = True
151                c = it.__next__()
152                idx += 1
153
154                # Escaped % symbol
155                if c is "%":  # noqa: F632
156                    _str_array_iadd(result, "%")
157                    begin = idx + 1
158                    in_specifier = False
159                    continue
160
161                # Parse named reference.
162                if c is "(":  # noqa: F632
163                    # Lazily initialize args_dict.
164                    if args_dict is None:
165                        if (
166                            _tuple_check(args)
167                            or _str_check(args)
168                            or not _mapping_check(args)
169                        ):
170                            raise TypeError("format requires a mapping")
171                        args_dict = args
172
173                    pcount = 1
174                    keystart = idx + 1
175                    while pcount > 0:
176                        c = it.__next__()
177                        idx += 1
178                        if c is ")":  # noqa: F632
179                            pcount -= 1
180                        elif c is "(":  # noqa: F632
181                            pcount += 1
182                    key = string[keystart:idx]
183
184                    # skip over closing ")"
185                    c = it.__next__()
186                    idx += 1
187
188                    # lookup parameter in dictionary.
189                    value = args_dict[self.cast(key)]
190                    args_tuple = (value,)
191                    args_len = 1
192                    arg_idx = 0
193
194                # Parse flags.
195                flags = 0
196                positive_sign = ""
197                use_alt_formatting = False
198                while True:
199                    if c is "-":  # noqa: F632
200                        flags |= _FLAG_LJUST
201                    elif c is "+":  # noqa: F632
202                        positive_sign = "+"
203                    elif c is " ":  # noqa: F632
204                        if positive_sign is not "+":  # noqa: F632
205                            positive_sign = " "
206                    elif c is "#":  # noqa: F632
207                        use_alt_formatting = True
208                    elif c is "0":  # noqa: F632
209                        flags |= _FLAG_ZERO
210                    else:
211                        break
212                    c = it.__next__()
213                    idx += 1
214
215                # Parse width.
216                width = -1
217                if c is "*":  # noqa: F632
218                    if arg_idx >= args_len:
219                        raise TypeError("not enough arguments for format string")
220                    arg = _tuple_getitem(args_tuple, arg_idx)
221                    arg_idx += 1
222                    if not _int_check(arg):
223                        raise TypeError("* wants int")
224                    width = arg
225                    if width < 0:
226                        flags |= _FLAG_LJUST
227                        width = -width
228                    c = it.__next__()
229                    idx += 1
230                elif "0" <= c <= "9":
231                    width = 0
232                    while True:
233                        width += ord(c) - ord("0")
234                        c = it.__next__()
235                        idx += 1
236                        if not ("0" <= c <= "9"):
237                            break
238                        width *= 10
239
240                # Parse precision.
241                precision = -1
242                if c is ".":  # noqa: F632
243                    precision = 0
244                    c = it.__next__()
245                    idx += 1
246                    if c is "*":  # noqa: F632
247                        if arg_idx >= args_len:
248                            raise TypeError("not enough arguments for format string")
249                        arg = _tuple_getitem(args_tuple, arg_idx)
250                        arg_idx += 1
251                        if not _int_check(arg):
252                            raise TypeError("* wants int")
253                        precision = max(0, arg)
254                        c = it.__next__()
255                        idx += 1
256                    elif "0" <= c <= "9":
257                        while True:
258                            precision += ord(c) - ord("0")
259                            c = it.__next__()
260                            idx += 1
261                            if not ("0" <= c <= "9"):
262                                break
263                            precision *= 10
264
265                # Parse and process format.
266                if arg_idx >= args_len:
267                    raise TypeError("not enough arguments for format string")
268                arg = _tuple_getitem(args_tuple, arg_idx)
269                arg_idx += 1
270
271                if c is "s":  # noqa: F632
272                    fragment = self.as_str(arg)
273                    _format_string(result, flags, width, precision, fragment)
274                elif c is "r":  # noqa: F632
275                    fragment = self.as_repr(arg)
276                    _format_string(result, flags, width, precision, fragment)
277                elif c is "a":  # noqa: F632
278                    fragment = ascii(arg)
279                    _format_string(result, flags, width, precision, fragment)
280                elif c is "c":  # noqa: F632
281                    if _str_check(arg):
282                        if _str_len(arg) != 1:
283                            raise self.percent_c_requires_int_or_char()
284                        fragment = arg
285                    else:
286                        try:
287                            value = _index(arg)
288                        except Exception:
289                            raise self.percent_c_requires_int_or_char() from None
290                        try:
291                            fragment = chr(value)
292                        except ValueError:
293                            raise self.percent_c_not_in_range() from None
294                        except OverflowError:
295                            raise self.percent_c_overflow() from None
296                        except Exception:
297                            raise self.percent_c_requires_int_or_char() from None
298                    _format_string(result, flags, width, precision, fragment)
299                elif c is "d" or c is "i" or c is "u":  # noqa: F632
300                    try:
301                        if not _number_check(arg):
302                            raise TypeError()
303                        value = int(arg)
304                    except TypeError:
305                        tname = _type(arg).__name__
306                        raise self.percent_d_a_number_is_required(c, tname) from None
307                    if value < 0:
308                        value = -value
309                        sign = "-"
310                    else:
311                        sign = positive_sign
312                    fragment = int.__str__(value)
313                    _format_number(result, flags, width, precision, sign, "", fragment)
314                elif c is "x":  # noqa: F632
315                    try:
316                        if not _number_check(arg):
317                            raise TypeError()
318                        value = _index(arg)
319                    except TypeError:
320                        raise TypeError(
321                            f"%{c} format: an integer is required, not {_type(arg).__name__}"
322                        ) from None
323                    if value < 0:
324                        value = -value
325                        sign = "-"
326                    else:
327                        sign = positive_sign
328                    prefix = "0x" if use_alt_formatting else ""
329                    fragment = _int_format(value, "x")
330                    _format_number(
331                        result, flags, width, precision, sign, prefix, fragment
332                    )
333                elif c is "X":  # noqa: F632
334                    try:
335                        if not _number_check(arg):
336                            raise TypeError()
337                        value = _index(arg)
338                    except TypeError:
339                        raise TypeError(
340                            f"%{c} format: an integer is required, not {_type(arg).__name__}"
341                        ) from None
342                    if value < 0:
343                        value = -value
344                        sign = "-"
345                    else:
346                        sign = positive_sign
347                    prefix = "0X" if use_alt_formatting else ""
348                    fragment = _int_format(value, "X")
349                    _format_number(
350                        result, flags, width, precision, sign, prefix, fragment
351                    )
352                elif c is "o":  # noqa: F632
353                    try:
354                        if not _number_check(arg):
355                            raise TypeError()
356                        value = _index(arg)
357                    except TypeError:
358                        tname = _type(arg).__name__
359                        raise TypeError(
360                            f"%o format: an integer is required, not {tname}"
361                        ) from None
362                    if value < 0:
363                        value = -value
364                        sign = "-"
365                    else:
366                        sign = positive_sign
367                    prefix = "0o" if use_alt_formatting else ""
368                    fragment = _int_format(value, "o")
369                    _format_number(
370                        result, flags, width, precision, sign, prefix, fragment
371                    )
372                elif c in "eEfFgG":
373                    try:
374                        value = _float(arg)
375                    except TypeError as float_exception:
376                        value = float_exception
377                    # TODO(T87283131) This is better handled with exception
378                    # chaining, but it currently breaks tests
379                    if not _float_check(value):
380                        tname = _type(arg).__name__
381                        raise self.must_be_real_number(value, tname)
382                    if precision < 0:
383                        precision = 6
384                    # The `value != value` test avoids emitting "-nan".
385                    if _float_signbit(value) and not value != value:
386                        sign = "-"
387                    else:
388                        sign = positive_sign
389                    fragment = _float_format(
390                        value, c, precision, True, False, use_alt_formatting
391                    )
392                    _format_number(result, flags, width, 0, sign, "", fragment)
393                else:
394                    raise ValueError(
395                        f"unsupported format character '{c}' ({ord(c):#x}) at index {idx}"
396                    )
397
398                begin = idx + 1
399                in_specifier = False
400        except StopIteration:
401            # Make sure everyone called `idx += 1` after `it.__next__()`.
402            assert idx + 1 == _str_len(string)
403
404        if in_specifier:
405            raise ValueError("incomplete format")
406        _str_array_iadd(result, string[begin:])
407
408        if arg_idx < args_len and args_dict is None:
409            # Lazily check that the user did not specify an args dictionary and if
410            # not raise an error:
411            if _tuple_check(args) or _str_check(args) or not _mapping_check(args):
412                raise self.not_all_arguments_converted()
413        return self.cast(result.__str__())
414
415
416class StringLikeFormatter(Formatter):
417    CATEGORY = "string"
418    as_str = cast = staticmethod(str)
419    as_repr = staticmethod(repr)
420
421    def percent_c_not_in_range(self):
422        import sys
423
424        return OverflowError("%c arg not in range({m:#x})".format(m=sys.maxunicode + 1))
425
426    def percent_c_overflow(self):
427        return TypeError("%c requires int or char")
428
429    def percent_c_requires_int_or_char(self):
430        return TypeError("%c requires int or char")
431
432    def percent_d_a_number_is_required(self, c, tname):
433        return TypeError(f"%{c} format: a number is required, not {tname}")
434
435    def must_be_real_number(self, float_exception, tname):
436        return float_exception
437
438
439class BytesLikeFormatter(Formatter):
440    CATEGORY = "bytes"
441
442    @staticmethod
443    def cast(s):
444        if _str_check(s):
445            return bytes(s, "utf-8")
446        return bytes(s)
447
448    @staticmethod
449    def as_str(s):
450        try:
451            if _bytes_check(s):
452                return s.decode("utf-8")
453            return bytes(s).decode()
454        except TypeError:
455            raise TypeError(
456                f"%b requires a bytes-like object, or an object that implements __bytes__, not '{_type(s).__name__}'"
457            )
458
459    @staticmethod
460    def as_repr(arg):
461        fragment = repr(arg)
462        return "".join(c if c <= "\xff" else f"\\U{ord(c):08x}" for c in fragment)
463
464    def percent_c_not_in_range(self):
465        raise OverflowError("%c arg not in range(256)")
466
467    def percent_c_overflow(self):
468        return OverflowError("%c arg not in range(256)")
469
470    def percent_c_requires_int_or_char(self):
471        return TypeError("%c requires an integer in range(256) or a single byte")
472
473    def percent_d_a_number_is_required(self, c, tname):
474        if c == "i":
475            c = "d"
476        return TypeError(f"%{c} format: a number is required, not {tname}")
477
478    def must_be_real_number(self, float_exception, tname):
479        return TypeError(f"float argument required, not {tname}")
480
481
482str_format = StringLikeFormatter().format
483bytes_format = BytesLikeFormatter().format