this repo has no description
at trunk 83 lines 3.8 kB view raw
1/* Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) */ 2#pragma once 3 4#include "objects.h" 5#include "runtime.h" 6 7namespace py { 8 9const word kByteTranslationTableLength = kMaxByte + 1; 10 11// Counts distinct occurrences of needle in haystack in the range [start, end). 12word bytesCount(const Bytes& haystack, word haystack_len, const Bytes& needle, 13 word needle_len, word start, word end); 14 15// Returns a Str object if each byte in bytes is ascii, else Unbound 16RawObject bytesDecodeASCII(Thread* thread, const Bytes& bytes); 17 18// Looks for needle in haystack in the range [start, end). Returns the first 19// starting index found in that range, or -1 if the needle was not found. 20word bytesFind(const Bytes& haystack, word haystack_len, const Bytes& needle, 21 word needle_len, word start, word end); 22 23word bytesHash(Thread* thread, RawObject object); 24 25// Converts the bytes into a string, mapping each byte to two hex characters. 26RawObject bytesHex(Thread* thread, const Bytes& bytes, word length); 27 28// Concatenates an iterable of bytes-like objects with a separator. Returns 29// Bytes or MutableBytes, depending on `sep`'s type. 30// 31// Raises TypeError if any of the items in `src` are not byteslike. 32RawObject bytesJoin(Thread* thread, const Bytes& sep, word sep_length, 33 const Tuple& src, word src_length); 34 35// Like `bytesFind`, but returns the last starting index in [start, end) or -1. 36word bytesRFind(const Bytes& haystack, word haystack_len, const Bytes& needle, 37 word needle_len, word start, word end); 38 39// Converts bytes into a string representation with single quote delimiters. 40RawObject bytesReprSingleQuotes(Thread* thread, const Bytes& bytes); 41 42// Split bytes into logical lines using \r, \n, or \r\n markers. 43// keepends == true keeps the newline characters, keepends == false does not. 44// Returns a list with a bytes objects for each line. 45RawObject bytesSplitLines(Thread* thread, const Bytes& bytes, word length, 46 bool keepends); 47 48// Strips the given characters from the end(s) of the given bytes. For left and 49// right variants, strips only the specified side. For space variants, strips 50// all ASCII whitespace from the specified side(s). 51RawObject bytesStrip(Thread* thread, const Bytes& bytes, word bytes_len, 52 const Bytes& chars, word chars_len); 53RawObject bytesStripLeft(Thread* thread, const Bytes& bytes, word bytes_len, 54 const Bytes& chars, word chars_len); 55RawObject bytesStripRight(Thread* thread, const Bytes& bytes, word bytes_len, 56 const Bytes& chars, word chars_len); 57RawObject bytesStripSpace(Thread* thread, const Bytes& bytes, word len); 58RawObject bytesStripSpaceLeft(Thread* thread, const Bytes& bytes, word len); 59RawObject bytesStripSpaceRight(Thread* thread, const Bytes& bytes, word len); 60 61// Returns a new Bytes containing the Bytes or MutableBytes subsequence of 62// bytes with the given start index and length. 63RawObject bytesSubseq(Thread* thread, const Bytes& bytes, word start, 64 word length); 65 66bool bytesIsValidUTF8(RawBytes bytes); 67 68// Test whether bytes are valid UTF-8 except that it also allows codepoints 69// from the surrogate range which is technically not valid UTF-8 but allowed 70// in strings, because python supports things like UTF-8B (aka surrogateescape). 71bool bytesIsValidStr(RawBytes bytes); 72 73void initializeBytesTypes(Thread* thread); 74 75inline word bytesHash(Thread* thread, RawObject object) { 76 if (object.isSmallBytes()) { 77 return SmallBytes::cast(object).hash(); 78 } 79 DCHECK(object.isLargeBytes(), "expected bytes object"); 80 return thread->runtime()->valueHash(object); 81} 82 83} // namespace py