this repo has no description
1/* Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) */
2#pragma once
3
4#include "objects.h"
5#include "runtime.h"
6
7namespace py {
8
9const word kByteTranslationTableLength = kMaxByte + 1;
10
11// Counts distinct occurrences of needle in haystack in the range [start, end).
12word bytesCount(const Bytes& haystack, word haystack_len, const Bytes& needle,
13 word needle_len, word start, word end);
14
15// Returns a Str object if each byte in bytes is ascii, else Unbound
16RawObject bytesDecodeASCII(Thread* thread, const Bytes& bytes);
17
18// Looks for needle in haystack in the range [start, end). Returns the first
19// starting index found in that range, or -1 if the needle was not found.
20word bytesFind(const Bytes& haystack, word haystack_len, const Bytes& needle,
21 word needle_len, word start, word end);
22
23word bytesHash(Thread* thread, RawObject object);
24
25// Converts the bytes into a string, mapping each byte to two hex characters.
26RawObject bytesHex(Thread* thread, const Bytes& bytes, word length);
27
28// Concatenates an iterable of bytes-like objects with a separator. Returns
29// Bytes or MutableBytes, depending on `sep`'s type.
30//
31// Raises TypeError if any of the items in `src` are not byteslike.
32RawObject bytesJoin(Thread* thread, const Bytes& sep, word sep_length,
33 const Tuple& src, word src_length);
34
35// Like `bytesFind`, but returns the last starting index in [start, end) or -1.
36word bytesRFind(const Bytes& haystack, word haystack_len, const Bytes& needle,
37 word needle_len, word start, word end);
38
39// Converts bytes into a string representation with single quote delimiters.
40RawObject bytesReprSingleQuotes(Thread* thread, const Bytes& bytes);
41
42// Split bytes into logical lines using \r, \n, or \r\n markers.
43// keepends == true keeps the newline characters, keepends == false does not.
44// Returns a list with a bytes objects for each line.
45RawObject bytesSplitLines(Thread* thread, const Bytes& bytes, word length,
46 bool keepends);
47
48// Strips the given characters from the end(s) of the given bytes. For left and
49// right variants, strips only the specified side. For space variants, strips
50// all ASCII whitespace from the specified side(s).
51RawObject bytesStrip(Thread* thread, const Bytes& bytes, word bytes_len,
52 const Bytes& chars, word chars_len);
53RawObject bytesStripLeft(Thread* thread, const Bytes& bytes, word bytes_len,
54 const Bytes& chars, word chars_len);
55RawObject bytesStripRight(Thread* thread, const Bytes& bytes, word bytes_len,
56 const Bytes& chars, word chars_len);
57RawObject bytesStripSpace(Thread* thread, const Bytes& bytes, word len);
58RawObject bytesStripSpaceLeft(Thread* thread, const Bytes& bytes, word len);
59RawObject bytesStripSpaceRight(Thread* thread, const Bytes& bytes, word len);
60
61// Returns a new Bytes containing the Bytes or MutableBytes subsequence of
62// bytes with the given start index and length.
63RawObject bytesSubseq(Thread* thread, const Bytes& bytes, word start,
64 word length);
65
66bool bytesIsValidUTF8(RawBytes bytes);
67
68// Test whether bytes are valid UTF-8 except that it also allows codepoints
69// from the surrogate range which is technically not valid UTF-8 but allowed
70// in strings, because python supports things like UTF-8B (aka surrogateescape).
71bool bytesIsValidStr(RawBytes bytes);
72
73void initializeBytesTypes(Thread* thread);
74
75inline word bytesHash(Thread* thread, RawObject object) {
76 if (object.isSmallBytes()) {
77 return SmallBytes::cast(object).hash();
78 }
79 DCHECK(object.isLargeBytes(), "expected bytes object");
80 return thread->runtime()->valueHash(object);
81}
82
83} // namespace py