/* Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com) */
#pragma once

#include "objects.h"
#include "runtime.h"

namespace py {

const word kByteTranslationTableLength = kMaxByte + 1;

// Counts distinct occurrences of needle in haystack in the range [start, end).
word bytesCount(const Bytes& haystack, word haystack_len, const Bytes& needle,
                word needle_len, word start, word end);

// Returns a Str object if each byte in bytes is ascii, else Unbound
RawObject bytesDecodeASCII(Thread* thread, const Bytes& bytes);

// Looks for needle in haystack in the range [start, end). Returns the first
// starting index found in that range, or -1 if the needle was not found.
word bytesFind(const Bytes& haystack, word haystack_len, const Bytes& needle,
               word needle_len, word start, word end);

word bytesHash(Thread* thread, RawObject object);

// Converts the bytes into a string, mapping each byte to two hex characters.
RawObject bytesHex(Thread* thread, const Bytes& bytes, word length);

// Concatenates an iterable of bytes-like objects with a separator. Returns
// Bytes or MutableBytes, depending on `sep`'s type.
//
// Raises TypeError if any of the items in `src` are not byteslike.
RawObject bytesJoin(Thread* thread, const Bytes& sep, word sep_length,
                    const Tuple& src, word src_length);

// Like `bytesFind`, but returns the last starting index in [start, end) or -1.
word bytesRFind(const Bytes& haystack, word haystack_len, const Bytes& needle,
                word needle_len, word start, word end);

// Converts bytes into a string representation with single quote delimiters.
RawObject bytesReprSingleQuotes(Thread* thread, const Bytes& bytes);

// Split bytes into logical lines using \r, \n, or \r\n markers.
// keepends == true keeps the newline characters, keepends == false does not.
// Returns a list with a bytes objects for each line.
RawObject bytesSplitLines(Thread* thread, const Bytes& bytes, word length,
                          bool keepends);

// Strips the given characters from the end(s) of the given bytes. For left and
// right variants, strips only the specified side. For space variants, strips
// all ASCII whitespace from the specified side(s).
RawObject bytesStrip(Thread* thread, const Bytes& bytes, word bytes_len,
                     const Bytes& chars, word chars_len);
RawObject bytesStripLeft(Thread* thread, const Bytes& bytes, word bytes_len,
                         const Bytes& chars, word chars_len);
RawObject bytesStripRight(Thread* thread, const Bytes& bytes, word bytes_len,
                          const Bytes& chars, word chars_len);
RawObject bytesStripSpace(Thread* thread, const Bytes& bytes, word len);
RawObject bytesStripSpaceLeft(Thread* thread, const Bytes& bytes, word len);
RawObject bytesStripSpaceRight(Thread* thread, const Bytes& bytes, word len);

// Returns a new Bytes containing the Bytes or MutableBytes subsequence of
// bytes with the given start index and length.
RawObject bytesSubseq(Thread* thread, const Bytes& bytes, word start,
                      word length);

bool bytesIsValidUTF8(RawBytes bytes);

// Test whether bytes are valid UTF-8 except that it also allows codepoints
// from the surrogate range which is technically not valid UTF-8 but allowed
// in strings, because python supports things like UTF-8B (aka surrogateescape).
bool bytesIsValidStr(RawBytes bytes);

void initializeBytesTypes(Thread* thread);

inline word bytesHash(Thread* thread, RawObject object) {
  if (object.isSmallBytes()) {
    return SmallBytes::cast(object).hash();
  }
  DCHECK(object.isLargeBytes(), "expected bytes object");
  return thread->runtime()->valueHash(object);
}

}  // namespace py