runtime/bytes-builtins.h at trunk · bernsteinbear.com/skybison

bernsteinbear.com / skybison
fork atom
this repo has no description
fork atom
skybison / runtime / bytes-builtins.h
at trunk 83 lines 3.8 kB view raw
wrap content
Max Bernstein Add license headers 4y ago
29d072a3
 1/* Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) */
 2#pragma once
 3
 4#include "objects.h"
 5#include "runtime.h"
 6
 7namespace py {
 8
 9const word kByteTranslationTableLength = kMaxByte + 1;
10
11// Counts distinct occurrences of needle in haystack in the range [start, end).
12word bytesCount(const Bytes& haystack, word haystack_len, const Bytes& needle,
13                word needle_len, word start, word end);
14
15// Returns a Str object if each byte in bytes is ascii, else Unbound
16RawObject bytesDecodeASCII(Thread* thread, const Bytes& bytes);
17
18// Looks for needle in haystack in the range [start, end). Returns the first
19// starting index found in that range, or -1 if the needle was not found.
20word bytesFind(const Bytes& haystack, word haystack_len, const Bytes& needle,
21               word needle_len, word start, word end);
22
23word bytesHash(Thread* thread, RawObject object);
24
25// Converts the bytes into a string, mapping each byte to two hex characters.
26RawObject bytesHex(Thread* thread, const Bytes& bytes, word length);
27
28// Concatenates an iterable of bytes-like objects with a separator. Returns
29// Bytes or MutableBytes, depending on `sep`'s type.
30//
31// Raises TypeError if any of the items in `src` are not byteslike.
32RawObject bytesJoin(Thread* thread, const Bytes& sep, word sep_length,
33                    const Tuple& src, word src_length);
34
35// Like `bytesFind`, but returns the last starting index in [start, end) or -1.
36word bytesRFind(const Bytes& haystack, word haystack_len, const Bytes& needle,
37                word needle_len, word start, word end);
38
39// Converts bytes into a string representation with single quote delimiters.
40RawObject bytesReprSingleQuotes(Thread* thread, const Bytes& bytes);
41
42// Split bytes into logical lines using \r, \n, or \r\n markers.
43// keepends == true keeps the newline characters, keepends == false does not.
44// Returns a list with a bytes objects for each line.
45RawObject bytesSplitLines(Thread* thread, const Bytes& bytes, word length,
46                          bool keepends);
47
48// Strips the given characters from the end(s) of the given bytes. For left and
49// right variants, strips only the specified side. For space variants, strips
50// all ASCII whitespace from the specified side(s).
51RawObject bytesStrip(Thread* thread, const Bytes& bytes, word bytes_len,
52                     const Bytes& chars, word chars_len);
53RawObject bytesStripLeft(Thread* thread, const Bytes& bytes, word bytes_len,
54                         const Bytes& chars, word chars_len);
55RawObject bytesStripRight(Thread* thread, const Bytes& bytes, word bytes_len,
56                          const Bytes& chars, word chars_len);
57RawObject bytesStripSpace(Thread* thread, const Bytes& bytes, word len);
58RawObject bytesStripSpaceLeft(Thread* thread, const Bytes& bytes, word len);
59RawObject bytesStripSpaceRight(Thread* thread, const Bytes& bytes, word len);
60
61// Returns a new Bytes containing the Bytes or MutableBytes subsequence of
62// bytes with the given start index and length.
63RawObject bytesSubseq(Thread* thread, const Bytes& bytes, word start,
64                      word length);
65
66bool bytesIsValidUTF8(RawBytes bytes);
67
68// Test whether bytes are valid UTF-8 except that it also allows codepoints
69// from the surrogate range which is technically not valid UTF-8 but allowed
70// in strings, because python supports things like UTF-8B (aka surrogateescape).
71bool bytesIsValidStr(RawBytes bytes);
72
73void initializeBytesTypes(Thread* thread);
74
75inline word bytesHash(Thread* thread, RawObject object) {
76  if (object.isSmallBytes()) {
77    return SmallBytes::cast(object).hash();
78  }
79  DCHECK(object.isLargeBytes(), "expected bytes object");
80  return thread->runtime()->valueHash(object);
81}
82
83}  // namespace py