src/external/tracy/common/tracy_lz4.cpp at main · matrixfurry.com/monado

matrixfurry.com / monado
The open source OpenXR runtime
monado / src / external / tracy / common / tracy_lz4.cpp
at main 2720 lines 113 kB view raw
   1/*
   2   LZ4 - Fast LZ compression algorithm
   3   Copyright (C) 2011-2020, Yann Collet.
   4
   5   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
   6
   7   Redistribution and use in source and binary forms, with or without
   8   modification, are permitted provided that the following conditions are
   9   met:
  10
  11       * Redistributions of source code must retain the above copyright
  12   notice, this list of conditions and the following disclaimer.
  13       * Redistributions in binary form must reproduce the above
  14   copyright notice, this list of conditions and the following disclaimer
  15   in the documentation and/or other materials provided with the
  16   distribution.
  17
  18   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30   You can contact the author at :
  31    - LZ4 homepage : http://www.lz4.org
  32    - LZ4 source repository : https://github.com/lz4/lz4
  33*/
  34
  35/*-************************************
  36*  Tuning parameters
  37**************************************/
  38/*
  39 * LZ4_HEAPMODE :
  40 * Select how default compression functions will allocate memory for their hash table,
  41 * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
  42 */
  43#ifndef LZ4_HEAPMODE
  44#  define LZ4_HEAPMODE 0
  45#endif
  46
  47/*
  48 * LZ4_ACCELERATION_DEFAULT :
  49 * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
  50 */
  51#define LZ4_ACCELERATION_DEFAULT 1
  52/*
  53 * LZ4_ACCELERATION_MAX :
  54 * Any "acceleration" value higher than this threshold
  55 * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
  56 */
  57#define LZ4_ACCELERATION_MAX 65537
  58
  59
  60/*-************************************
  61*  CPU Feature Detection
  62**************************************/
  63/* LZ4_FORCE_MEMORY_ACCESS
  64 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
  65 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
  66 * The below switch allow to select different access method for improved performance.
  67 * Method 0 (default) : use `memcpy()`. Safe and portable.
  68 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
  69 *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
  70 * Method 2 : direct access. This method is portable but violate C standard.
  71 *            It can generate buggy code on targets which assembly generation depends on alignment.
  72 *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
  73 * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
  74 * Prefer these methods in priority order (0 > 1 > 2)
  75 */
  76#ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
  77#  if defined(__GNUC__) && \
  78  ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
  79  || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
  80#    define LZ4_FORCE_MEMORY_ACCESS 2
  81#  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
  82#    define LZ4_FORCE_MEMORY_ACCESS 1
  83#  endif
  84#endif
  85
  86/*
  87 * LZ4_FORCE_SW_BITCOUNT
  88 * Define this parameter if your target system or compiler does not support hardware bit count
  89 */
  90#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for WinCE doesn't support Hardware bit count */
  91#  undef  LZ4_FORCE_SW_BITCOUNT  /* avoid double def */
  92#  define LZ4_FORCE_SW_BITCOUNT
  93#endif
  94
  95
  96
  97/*-************************************
  98*  Dependency
  99**************************************/
 100/*
 101 * LZ4_SRC_INCLUDED:
 102 * Amalgamation flag, whether lz4.c is included
 103 */
 104#ifndef LZ4_SRC_INCLUDED
 105#  define LZ4_SRC_INCLUDED 1
 106#endif
 107
 108#ifndef LZ4_STATIC_LINKING_ONLY
 109#define LZ4_STATIC_LINKING_ONLY
 110#endif
 111
 112#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
 113#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
 114#endif
 115
 116#define LZ4_STATIC_LINKING_ONLY  /* LZ4_DISTANCE_MAX */
 117#include "tracy_lz4.hpp"
 118/* see also "memory routines" below */
 119
 120
 121/*-************************************
 122*  Compiler Options
 123**************************************/
 124#if defined(_MSC_VER) && (_MSC_VER >= 1400)  /* Visual Studio 2005+ */
 125#  include <intrin.h>               /* only present in VS2005+ */
 126#  pragma warning(disable : 4127)   /* disable: C4127: conditional expression is constant */
 127#  pragma warning(disable : 6237)   /* disable: C6237: conditional expression is always 0 */
 128#endif  /* _MSC_VER */
 129
 130#ifndef LZ4_FORCE_INLINE
 131#  ifdef _MSC_VER    /* Visual Studio */
 132#    define LZ4_FORCE_INLINE static __forceinline
 133#  else
 134#    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
 135#      ifdef __GNUC__
 136#        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
 137#      else
 138#        define LZ4_FORCE_INLINE static inline
 139#      endif
 140#    else
 141#      define LZ4_FORCE_INLINE static
 142#    endif /* __STDC_VERSION__ */
 143#  endif  /* _MSC_VER */
 144#endif /* LZ4_FORCE_INLINE */
 145
 146/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
 147 * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
 148 * together with a simple 8-byte copy loop as a fall-back path.
 149 * However, this optimization hurts the decompression speed by >30%,
 150 * because the execution does not go to the optimized loop
 151 * for typical compressible data, and all of the preamble checks
 152 * before going to the fall-back path become useless overhead.
 153 * This optimization happens only with the -O3 flag, and -O2 generates
 154 * a simple 8-byte copy loop.
 155 * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
 156 * functions are annotated with __attribute__((optimize("O2"))),
 157 * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
 158 * of LZ4_wildCopy8 does not affect the compression speed.
 159 */
 160#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
 161#  define LZ4_FORCE_O2  __attribute__((optimize("O2")))
 162#  undef LZ4_FORCE_INLINE
 163#  define LZ4_FORCE_INLINE  static __inline __attribute__((optimize("O2"),always_inline))
 164#else
 165#  define LZ4_FORCE_O2
 166#endif
 167
 168#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
 169#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
 170#else
 171#  define expect(expr,value)    (expr)
 172#endif
 173
 174#ifndef likely
 175#define likely(expr)     expect((expr) != 0, 1)
 176#endif
 177#ifndef unlikely
 178#define unlikely(expr)   expect((expr) != 0, 0)
 179#endif
 180
 181/* Should the alignment test prove unreliable, for some reason,
 182 * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
 183#ifndef LZ4_ALIGN_TEST  /* can be externally provided */
 184# define LZ4_ALIGN_TEST 1
 185#endif
 186
 187
 188/*-************************************
 189*  Memory routines
 190**************************************/
 191
 192/*! LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION :
 193 *  Disable relatively high-level LZ4/HC functions that use dynamic memory
 194 *  allocation functions (malloc(), calloc(), free()).
 195 *
 196 *  Note that this is a compile-time switch. And since it disables
 197 *  public/stable LZ4 v1 API functions, we don't recommend using this
 198 *  symbol to generate a library for distribution.
 199 *
 200 *  The following public functions are removed when this symbol is defined.
 201 *  - lz4   : LZ4_createStream, LZ4_freeStream,
 202 *            LZ4_createStreamDecode, LZ4_freeStreamDecode, LZ4_create (deprecated)
 203 *  - lz4hc : LZ4_createStreamHC, LZ4_freeStreamHC,
 204 *            LZ4_createHC (deprecated), LZ4_freeHC  (deprecated)
 205 *  - lz4frame, lz4file : All LZ4F_* functions
 206 */
 207#if defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
 208#  define ALLOC(s)          lz4_error_memory_allocation_is_disabled
 209#  define ALLOC_AND_ZERO(s) lz4_error_memory_allocation_is_disabled
 210#  define FREEMEM(p)        lz4_error_memory_allocation_is_disabled
 211#elif defined(LZ4_USER_MEMORY_FUNCTIONS)
 212/* memory management functions can be customized by user project.
 213 * Below functions must exist somewhere in the Project
 214 * and be available at link time */
 215void* LZ4_malloc(size_t s);
 216void* LZ4_calloc(size_t n, size_t s);
 217void  LZ4_free(void* p);
 218# define ALLOC(s)          LZ4_malloc(s)
 219# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s)
 220# define FREEMEM(p)        LZ4_free(p)
 221#else
 222# include <stdlib.h>   /* malloc, calloc, free */
 223# define ALLOC(s)          malloc(s)
 224# define ALLOC_AND_ZERO(s) calloc(1,s)
 225# define FREEMEM(p)        free(p)
 226#endif
 227
 228#if ! LZ4_FREESTANDING
 229#  include <string.h>   /* memset, memcpy */
 230#endif
 231#if !defined(LZ4_memset)
 232#  define LZ4_memset(p,v,s) memset((p),(v),(s))
 233#endif
 234#define MEM_INIT(p,v,s)   LZ4_memset((p),(v),(s))
 235
 236
 237/*-************************************
 238*  Common Constants
 239**************************************/
 240#define MINMATCH 4
 241
 242#define WILDCOPYLENGTH 8
 243#define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
 244#define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
 245#define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
 246#define FASTLOOP_SAFE_DISTANCE 64
 247static const int LZ4_minLength = (MFLIMIT+1);
 248
 249#define KB *(1 <<10)
 250#define MB *(1 <<20)
 251#define GB *(1U<<30)
 252
 253#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
 254#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX)   /* max supported by LZ4 format */
 255#  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
 256#endif
 257
 258#define ML_BITS  4
 259#define ML_MASK  ((1U<<ML_BITS)-1)
 260#define RUN_BITS (8-ML_BITS)
 261#define RUN_MASK ((1U<<RUN_BITS)-1)
 262
 263
 264/*-************************************
 265*  Error detection
 266**************************************/
 267#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
 268#  include <assert.h>
 269#else
 270#  ifndef assert
 271#    define assert(condition) ((void)0)
 272#  endif
 273#endif
 274
 275#define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
 276
 277#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
 278#  include <stdio.h>
 279   static int g_debuglog_enable = 1;
 280#  define DEBUGLOG(l, ...) {                          \
 281        if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
 282            fprintf(stderr, __FILE__ ": ");           \
 283            fprintf(stderr, __VA_ARGS__);             \
 284            fprintf(stderr, " \n");                   \
 285    }   }
 286#else
 287#  define DEBUGLOG(l, ...) {}    /* disabled */
 288#endif
 289
 290static int LZ4_isAligned(const void* ptr, size_t alignment)
 291{
 292    return ((size_t)ptr & (alignment -1)) == 0;
 293}
 294
 295
 296/*-************************************
 297*  Types
 298**************************************/
 299#include <limits.h>
 300#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 301# include <stdint.h>
 302  typedef  uint8_t BYTE;
 303  typedef uint16_t U16;
 304  typedef uint32_t U32;
 305  typedef  int32_t S32;
 306  typedef uint64_t U64;
 307  typedef uintptr_t uptrval;
 308#else
 309# if UINT_MAX != 4294967295UL
 310#   error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
 311# endif
 312  typedef unsigned char       BYTE;
 313  typedef unsigned short      U16;
 314  typedef unsigned int        U32;
 315  typedef   signed int        S32;
 316  typedef unsigned long long  U64;
 317  typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
 318#endif
 319
 320#if defined(__x86_64__)
 321  typedef U64    reg_t;   /* 64-bits in x32 mode */
 322#else
 323  typedef size_t reg_t;   /* 32-bits in x32 mode */
 324#endif
 325
 326typedef enum {
 327    notLimited = 0,
 328    limitedOutput = 1,
 329    fillOutput = 2
 330} limitedOutput_directive;
 331
 332namespace tracy
 333{
 334
 335/*-************************************
 336*  Reading and writing into memory
 337**************************************/
 338
 339/**
 340 * LZ4 relies on memcpy with a constant size being inlined. In freestanding
 341 * environments, the compiler can't assume the implementation of memcpy() is
 342 * standard compliant, so it can't apply its specialized memcpy() inlining
 343 * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
 344 * memcpy() as if it were standard compliant, so it can inline it in freestanding
 345 * environments. This is needed when decompressing the Linux Kernel, for example.
 346 */
 347#if !defined(LZ4_memcpy)
 348#  if defined(__GNUC__) && (__GNUC__ >= 4)
 349#    define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
 350#  else
 351#    define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
 352#  endif
 353#endif
 354
 355#if !defined(LZ4_memmove)
 356#  if defined(__GNUC__) && (__GNUC__ >= 4)
 357#    define LZ4_memmove __builtin_memmove
 358#  else
 359#    define LZ4_memmove memmove
 360#  endif
 361#endif
 362
 363static unsigned LZ4_isLittleEndian(void)
 364{
 365    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
 366    return one.c[0];
 367}
 368
 369
 370#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
 371/* lie to the compiler about data alignment; use with caution */
 372
 373static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
 374static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
 375static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
 376
 377static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
 378static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
 379
 380#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
 381
 382/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
 383/* currently only defined for gcc and icc */
 384typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) LZ4_unalign;
 385
 386static U16 LZ4_read16(const void* ptr) { return ((const LZ4_unalign*)ptr)->u16; }
 387static U32 LZ4_read32(const void* ptr) { return ((const LZ4_unalign*)ptr)->u32; }
 388static reg_t LZ4_read_ARCH(const void* ptr) { return ((const LZ4_unalign*)ptr)->uArch; }
 389
 390static void LZ4_write16(void* memPtr, U16 value) { ((LZ4_unalign*)memPtr)->u16 = value; }
 391static void LZ4_write32(void* memPtr, U32 value) { ((LZ4_unalign*)memPtr)->u32 = value; }
 392
 393#else  /* safe and portable access using memcpy() */
 394
 395static U16 LZ4_read16(const void* memPtr)
 396{
 397    U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
 398}
 399
 400static U32 LZ4_read32(const void* memPtr)
 401{
 402    U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
 403}
 404
 405static reg_t LZ4_read_ARCH(const void* memPtr)
 406{
 407    reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
 408}
 409
 410static void LZ4_write16(void* memPtr, U16 value)
 411{
 412    LZ4_memcpy(memPtr, &value, sizeof(value));
 413}
 414
 415static void LZ4_write32(void* memPtr, U32 value)
 416{
 417    LZ4_memcpy(memPtr, &value, sizeof(value));
 418}
 419
 420#endif /* LZ4_FORCE_MEMORY_ACCESS */
 421
 422
 423static U16 LZ4_readLE16(const void* memPtr)
 424{
 425    if (LZ4_isLittleEndian()) {
 426        return LZ4_read16(memPtr);
 427    } else {
 428        const BYTE* p = (const BYTE*)memPtr;
 429        return (U16)((U16)p[0] + (p[1]<<8));
 430    }
 431}
 432
 433static void LZ4_writeLE16(void* memPtr, U16 value)
 434{
 435    if (LZ4_isLittleEndian()) {
 436        LZ4_write16(memPtr, value);
 437    } else {
 438        BYTE* p = (BYTE*)memPtr;
 439        p[0] = (BYTE) value;
 440        p[1] = (BYTE)(value>>8);
 441    }
 442}
 443
 444/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
 445LZ4_FORCE_INLINE
 446void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
 447{
 448    BYTE* d = (BYTE*)dstPtr;
 449    const BYTE* s = (const BYTE*)srcPtr;
 450    BYTE* const e = (BYTE*)dstEnd;
 451
 452    do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
 453}
 454
 455static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
 456static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
 457
 458
 459#ifndef LZ4_FAST_DEC_LOOP
 460#  if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
 461#    define LZ4_FAST_DEC_LOOP 1
 462#  elif defined(__aarch64__) && defined(__APPLE__)
 463#    define LZ4_FAST_DEC_LOOP 1
 464#  elif defined(__aarch64__) && !defined(__clang__)
 465     /* On non-Apple aarch64, we disable this optimization for clang because
 466      * on certain mobile chipsets, performance is reduced with clang. For
 467      * more information refer to https://github.com/lz4/lz4/pull/707 */
 468#    define LZ4_FAST_DEC_LOOP 1
 469#  else
 470#    define LZ4_FAST_DEC_LOOP 0
 471#  endif
 472#endif
 473
 474#if LZ4_FAST_DEC_LOOP
 475
 476LZ4_FORCE_INLINE void
 477LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
 478{
 479    assert(srcPtr + offset == dstPtr);
 480    if (offset < 8) {
 481        LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
 482        dstPtr[0] = srcPtr[0];
 483        dstPtr[1] = srcPtr[1];
 484        dstPtr[2] = srcPtr[2];
 485        dstPtr[3] = srcPtr[3];
 486        srcPtr += inc32table[offset];
 487        LZ4_memcpy(dstPtr+4, srcPtr, 4);
 488        srcPtr -= dec64table[offset];
 489        dstPtr += 8;
 490    } else {
 491        LZ4_memcpy(dstPtr, srcPtr, 8);
 492        dstPtr += 8;
 493        srcPtr += 8;
 494    }
 495
 496    LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
 497}
 498
 499/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
 500 * this version copies two times 16 bytes (instead of one time 32 bytes)
 501 * because it must be compatible with offsets >= 16. */
 502LZ4_FORCE_INLINE void
 503LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
 504{
 505    BYTE* d = (BYTE*)dstPtr;
 506    const BYTE* s = (const BYTE*)srcPtr;
 507    BYTE* const e = (BYTE*)dstEnd;
 508
 509    do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
 510}
 511
 512/* LZ4_memcpy_using_offset()  presumes :
 513 * - dstEnd >= dstPtr + MINMATCH
 514 * - there is at least 8 bytes available to write after dstEnd */
 515LZ4_FORCE_INLINE void
 516LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
 517{
 518    BYTE v[8];
 519
 520    assert(dstEnd >= dstPtr + MINMATCH);
 521
 522    switch(offset) {
 523    case 1:
 524        MEM_INIT(v, *srcPtr, 8);
 525        break;
 526    case 2:
 527        LZ4_memcpy(v, srcPtr, 2);
 528        LZ4_memcpy(&v[2], srcPtr, 2);
 529#if defined(_MSC_VER) && (_MSC_VER <= 1933) /* MSVC 2022 ver 17.3 or earlier */
 530#  pragma warning(push)
 531#  pragma warning(disable : 6385) /* warning C6385: Reading invalid data from 'v'. */
 532#endif
 533        LZ4_memcpy(&v[4], v, 4);
 534#if defined(_MSC_VER) && (_MSC_VER <= 1933) /* MSVC 2022 ver 17.3 or earlier */
 535#  pragma warning(pop)
 536#endif
 537        break;
 538    case 4:
 539        LZ4_memcpy(v, srcPtr, 4);
 540        LZ4_memcpy(&v[4], srcPtr, 4);
 541        break;
 542    default:
 543        LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
 544        return;
 545    }
 546
 547    LZ4_memcpy(dstPtr, v, 8);
 548    dstPtr += 8;
 549    while (dstPtr < dstEnd) {
 550        LZ4_memcpy(dstPtr, v, 8);
 551        dstPtr += 8;
 552    }
 553}
 554#endif
 555
 556
 557/*-************************************
 558*  Common functions
 559**************************************/
 560LZ4_FORCE_INLINE unsigned LZ4_NbCommonBytes (reg_t val)
 561{
 562    assert(val != 0);
 563    if (LZ4_isLittleEndian()) {
 564        if (sizeof(val) == 8) {
 565#       if defined(_MSC_VER) && (_MSC_VER >= 1800) && (defined(_M_AMD64) && !defined(_M_ARM64EC)) && !defined(LZ4_FORCE_SW_BITCOUNT)
 566/*-*************************************************************************************************
 567* ARM64EC is a Microsoft-designed ARM64 ABI compatible with AMD64 applications on ARM64 Windows 11.
 568* The ARM64EC ABI does not support AVX/AVX2/AVX512 instructions, nor their relevant intrinsics
 569* including _tzcnt_u64. Therefore, we need to neuter the _tzcnt_u64 code path for ARM64EC.
 570****************************************************************************************************/
 571#         if defined(__clang__) && (__clang_major__ < 10)
 572            /* Avoid undefined clang-cl intrinsics issue.
 573             * See https://github.com/lz4/lz4/pull/1017 for details. */
 574            return (unsigned)__builtin_ia32_tzcnt_u64(val) >> 3;
 575#         else
 576            /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
 577            return (unsigned)_tzcnt_u64(val) >> 3;
 578#         endif
 579#       elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
 580            unsigned long r = 0;
 581            _BitScanForward64(&r, (U64)val);
 582            return (unsigned)r >> 3;
 583#       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
 584                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
 585                                        !defined(LZ4_FORCE_SW_BITCOUNT)
 586            return (unsigned)__builtin_ctzll((U64)val) >> 3;
 587#       else
 588            const U64 m = 0x0101010101010101ULL;
 589            val ^= val - 1;
 590            return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
 591#       endif
 592        } else /* 32 bits */ {
 593#       if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
 594            unsigned long r;
 595            _BitScanForward(&r, (U32)val);
 596            return (unsigned)r >> 3;
 597#       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
 598                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
 599                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
 600            return (unsigned)__builtin_ctz((U32)val) >> 3;
 601#       else
 602            const U32 m = 0x01010101;
 603            return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
 604#       endif
 605        }
 606    } else   /* Big Endian CPU */ {
 607        if (sizeof(val)==8) {
 608#       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
 609                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
 610                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
 611            return (unsigned)__builtin_clzll((U64)val) >> 3;
 612#       else
 613#if 1
 614            /* this method is probably faster,
 615             * but adds a 128 bytes lookup table */
 616            static const unsigned char ctz7_tab[128] = {
 617                7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 618                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 619                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 620                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 621                6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 622                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 623                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 624                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 625            };
 626            U64 const mask = 0x0101010101010101ULL;
 627            U64 const t = (((val >> 8) - mask) | val) & mask;
 628            return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
 629#else
 630            /* this method doesn't consume memory space like the previous one,
 631             * but it contains several branches,
 632             * that may end up slowing execution */
 633            static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
 634            Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
 635            Note that this code path is never triggered in 32-bits mode. */
 636            unsigned r;
 637            if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
 638            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
 639            r += (!val);
 640            return r;
 641#endif
 642#       endif
 643        } else /* 32 bits */ {
 644#       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
 645                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
 646                                        !defined(LZ4_FORCE_SW_BITCOUNT)
 647            return (unsigned)__builtin_clz((U32)val) >> 3;
 648#       else
 649            val >>= 8;
 650            val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
 651              (val + 0x00FF0000)) >> 24;
 652            return (unsigned)val ^ 3;
 653#       endif
 654        }
 655    }
 656}
 657
 658
 659#define STEPSIZE sizeof(reg_t)
 660LZ4_FORCE_INLINE
 661unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
 662{
 663    const BYTE* const pStart = pIn;
 664
 665    if (likely(pIn < pInLimit-(STEPSIZE-1))) {
 666        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
 667        if (!diff) {
 668            pIn+=STEPSIZE; pMatch+=STEPSIZE;
 669        } else {
 670            return LZ4_NbCommonBytes(diff);
 671    }   }
 672
 673    while (likely(pIn < pInLimit-(STEPSIZE-1))) {
 674        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
 675        if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
 676        pIn += LZ4_NbCommonBytes(diff);
 677        return (unsigned)(pIn - pStart);
 678    }
 679
 680    if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
 681    if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
 682    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
 683    return (unsigned)(pIn - pStart);
 684}
 685
 686
 687#ifndef LZ4_COMMONDEFS_ONLY
 688/*-************************************
 689*  Local Constants
 690**************************************/
 691static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
 692static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
 693
 694
 695/*-************************************
 696*  Local Structures and types
 697**************************************/
 698typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
 699
 700/**
 701 * This enum distinguishes several different modes of accessing previous
 702 * content in the stream.
 703 *
 704 * - noDict        : There is no preceding content.
 705 * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
 706 *                   blob being compressed are valid and refer to the preceding
 707 *                   content (of length ctx->dictSize), which is available
 708 *                   contiguously preceding in memory the content currently
 709 *                   being compressed.
 710 * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
 711 *                   else in memory, starting at ctx->dictionary with length
 712 *                   ctx->dictSize.
 713 * - usingDictCtx  : Everything concerning the preceding content is
 714 *                   in a separate context, pointed to by ctx->dictCtx.
 715 *                   ctx->dictionary, ctx->dictSize, and table entries
 716 *                   in the current context that refer to positions
 717 *                   preceding the beginning of the current compression are
 718 *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
 719 *                   ->dictSize describe the location and size of the preceding
 720 *                   content, and matches are found by looking in the ctx
 721 *                   ->dictCtx->hashTable.
 722 */
 723typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
 724typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
 725
 726
 727/*-************************************
 728*  Local Utils
 729**************************************/
 730int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
 731const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
 732int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
 733int LZ4_sizeofState(void) { return sizeof(LZ4_stream_t); }
 734
 735
 736/*-****************************************
 737*  Internal Definitions, used only in Tests
 738*******************************************/
 739
 740int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
 741
 742int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
 743                                     int compressedSize, int maxOutputSize,
 744                                     const void* dictStart, size_t dictSize);
 745int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest,
 746                                     int compressedSize, int targetOutputSize, int dstCapacity,
 747                                     const void* dictStart, size_t dictSize);
 748
 749/*-******************************
 750*  Compression functions
 751********************************/
 752LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
 753{
 754    if (tableType == byU16)
 755        return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
 756    else
 757        return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
 758}
 759
 760LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
 761{
 762    const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
 763    if (LZ4_isLittleEndian()) {
 764        const U64 prime5bytes = 889523592379ULL;
 765        return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
 766    } else {
 767        const U64 prime8bytes = 11400714785074694791ULL;
 768        return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 769    }
 770}
 771
 772LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
 773{
 774    if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
 775    return LZ4_hash4(LZ4_read32(p), tableType);
 776}
 777
 778LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
 779{
 780    switch (tableType)
 781    {
 782    default: /* fallthrough */
 783    case clearedTable: { /* illegal! */ assert(0); return; }
 784    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
 785    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
 786    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
 787    }
 788}
 789
 790LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
 791{
 792    switch (tableType)
 793    {
 794    default: /* fallthrough */
 795    case clearedTable: /* fallthrough */
 796    case byPtr: { /* illegal! */ assert(0); return; }
 797    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
 798    case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
 799    }
 800}
 801
 802LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
 803                                  void* tableBase, tableType_t const tableType,
 804                            const BYTE* srcBase)
 805{
 806    switch (tableType)
 807    {
 808    case clearedTable: { /* illegal! */ assert(0); return; }
 809    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
 810    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
 811    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
 812    }
 813}
 814
 815LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
 816{
 817    U32 const h = LZ4_hashPosition(p, tableType);
 818    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
 819}
 820
 821/* LZ4_getIndexOnHash() :
 822 * Index of match position registered in hash table.
 823 * hash position must be calculated by using base+index, or dictBase+index.
 824 * Assumption 1 : only valid if tableType == byU32 or byU16.
 825 * Assumption 2 : h is presumed valid (within limits of hash table)
 826 */
 827LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
 828{
 829    LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
 830    if (tableType == byU32) {
 831        const U32* const hashTable = (const U32*) tableBase;
 832        assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
 833        return hashTable[h];
 834    }
 835    if (tableType == byU16) {
 836        const U16* const hashTable = (const U16*) tableBase;
 837        assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
 838        return hashTable[h];
 839    }
 840    assert(0); return 0;  /* forbidden case */
 841}
 842
 843static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase)
 844{
 845    if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
 846    if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; }
 847    { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
 848}
 849
 850LZ4_FORCE_INLINE const BYTE*
 851LZ4_getPosition(const BYTE* p,
 852                const void* tableBase, tableType_t tableType,
 853                const BYTE* srcBase)
 854{
 855    U32 const h = LZ4_hashPosition(p, tableType);
 856    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
 857}
 858
 859LZ4_FORCE_INLINE void
 860LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
 861           const int inputSize,
 862           const tableType_t tableType) {
 863    /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
 864     * therefore safe to use no matter what mode we're in. Otherwise, we figure
 865     * out if it's safe to leave as is or whether it needs to be reset.
 866     */
 867    if ((tableType_t)cctx->tableType != clearedTable) {
 868        assert(inputSize >= 0);
 869        if ((tableType_t)cctx->tableType != tableType
 870          || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
 871          || ((tableType == byU32) && cctx->currentOffset > 1 GB)
 872          || tableType == byPtr
 873          || inputSize >= 4 KB)
 874        {
 875            DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
 876            MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
 877            cctx->currentOffset = 0;
 878            cctx->tableType = (U32)clearedTable;
 879        } else {
 880            DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
 881        }
 882    }
 883
 884    /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back,
 885     * is faster than compressing without a gap.
 886     * However, compressing with currentOffset == 0 is faster still,
 887     * so we preserve that case.
 888     */
 889    if (cctx->currentOffset != 0 && tableType == byU32) {
 890        DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
 891        cctx->currentOffset += 64 KB;
 892    }
 893
 894    /* Finally, clear history */
 895    cctx->dictCtx = NULL;
 896    cctx->dictionary = NULL;
 897    cctx->dictSize = 0;
 898}
 899
 900/** LZ4_compress_generic() :
 901 *  inlined, to ensure branches are decided at compilation time.
 902 *  Presumed already validated at this stage:
 903 *  - source != NULL
 904 *  - inputSize > 0
 905 */
 906LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
 907                 LZ4_stream_t_internal* const cctx,
 908                 const char* const source,
 909                 char* const dest,
 910                 const int inputSize,
 911                 int*  inputConsumed, /* only written when outputDirective == fillOutput */
 912                 const int maxOutputSize,
 913                 const limitedOutput_directive outputDirective,
 914                 const tableType_t tableType,
 915                 const dict_directive dictDirective,
 916                 const dictIssue_directive dictIssue,
 917                 const int acceleration)
 918{
 919    int result;
 920    const BYTE* ip = (const BYTE*) source;
 921
 922    U32 const startIndex = cctx->currentOffset;
 923    const BYTE* base = (const BYTE*) source - startIndex;
 924    const BYTE* lowLimit;
 925
 926    const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
 927    const BYTE* const dictionary =
 928        dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
 929    const U32 dictSize =
 930        dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
 931    const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0;   /* make indexes in dictCtx comparable with index in current context */
 932
 933    int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
 934    U32 const prefixIdxLimit = startIndex - dictSize;   /* used when dictDirective == dictSmall */
 935    const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
 936    const BYTE* anchor = (const BYTE*) source;
 937    const BYTE* const iend = ip + inputSize;
 938    const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
 939    const BYTE* const matchlimit = iend - LASTLITERALS;
 940
 941    /* the dictCtx currentOffset is indexed on the start of the dictionary,
 942     * while a dictionary in the current context precedes the currentOffset */
 943    const BYTE* dictBase = (dictionary == NULL) ? NULL :
 944                           (dictDirective == usingDictCtx) ?
 945                            dictionary + dictSize - dictCtx->currentOffset :
 946                            dictionary + dictSize - startIndex;
 947
 948    BYTE* op = (BYTE*) dest;
 949    BYTE* const olimit = op + maxOutputSize;
 950
 951    U32 offset = 0;
 952    U32 forwardH;
 953
 954    DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
 955    assert(ip != NULL);
 956    /* If init conditions are not met, we don't have to mark stream
 957     * as having dirty context, since no action was taken yet */
 958    if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
 959    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; }  /* Size too large (not within 64K limit) */
 960    if (tableType==byPtr) assert(dictDirective==noDict);      /* only supported use case with byPtr */
 961    assert(acceleration >= 1);
 962
 963    lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
 964
 965    /* Update context state */
 966    if (dictDirective == usingDictCtx) {
 967        /* Subsequent linked blocks can't use the dictionary. */
 968        /* Instead, they use the block we just compressed. */
 969        cctx->dictCtx = NULL;
 970        cctx->dictSize = (U32)inputSize;
 971    } else {
 972        cctx->dictSize += (U32)inputSize;
 973    }
 974    cctx->currentOffset += (U32)inputSize;
 975    cctx->tableType = (U32)tableType;
 976
 977    if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */
 978
 979    /* First Byte */
 980    LZ4_putPosition(ip, cctx->hashTable, tableType, base);
 981    ip++; forwardH = LZ4_hashPosition(ip, tableType);
 982
 983    /* Main Loop */
 984    for ( ; ; ) {
 985        const BYTE* match;
 986        BYTE* token;
 987        const BYTE* filledIp;
 988
 989        /* Find a match */
 990        if (tableType == byPtr) {
 991            const BYTE* forwardIp = ip;
 992            int step = 1;
 993            int searchMatchNb = acceleration << LZ4_skipTrigger;
 994            do {
 995                U32 const h = forwardH;
 996                ip = forwardIp;
 997                forwardIp += step;
 998                step = (searchMatchNb++ >> LZ4_skipTrigger);
 999
1000                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
1001                assert(ip < mflimitPlusOne);
1002
1003                match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
1004                forwardH = LZ4_hashPosition(forwardIp, tableType);
1005                LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
1006
1007            } while ( (match+LZ4_DISTANCE_MAX < ip)
1008                   || (LZ4_read32(match) != LZ4_read32(ip)) );
1009
1010        } else {   /* byU32, byU16 */
1011
1012            const BYTE* forwardIp = ip;
1013            int step = 1;
1014            int searchMatchNb = acceleration << LZ4_skipTrigger;
1015            do {
1016                U32 const h = forwardH;
1017                U32 const current = (U32)(forwardIp - base);
1018                U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
1019                assert(matchIndex <= current);
1020                assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
1021                ip = forwardIp;
1022                forwardIp += step;
1023                step = (searchMatchNb++ >> LZ4_skipTrigger);
1024
1025                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
1026                assert(ip < mflimitPlusOne);
1027
1028                if (dictDirective == usingDictCtx) {
1029                    if (matchIndex < startIndex) {
1030                        /* there was no match, try the dictionary */
1031                        assert(tableType == byU32);
1032                        matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
1033                        match = dictBase + matchIndex;
1034                        matchIndex += dictDelta;   /* make dictCtx index comparable with current context */
1035                        lowLimit = dictionary;
1036                    } else {
1037                        match = base + matchIndex;
1038                        lowLimit = (const BYTE*)source;
1039                    }
1040                } else if (dictDirective == usingExtDict) {
1041                    if (matchIndex < startIndex) {
1042                        DEBUGLOG(7, "extDict candidate: matchIndex=%5u  <  startIndex=%5u", matchIndex, startIndex);
1043                        assert(startIndex - matchIndex >= MINMATCH);
1044                        assert(dictBase);
1045                        match = dictBase + matchIndex;
1046                        lowLimit = dictionary;
1047                    } else {
1048                        match = base + matchIndex;
1049                        lowLimit = (const BYTE*)source;
1050                    }
1051                } else {   /* single continuous memory segment */
1052                    match = base + matchIndex;
1053                }
1054                forwardH = LZ4_hashPosition(forwardIp, tableType);
1055                LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
1056
1057                DEBUGLOG(7, "candidate at pos=%u  (offset=%u \n", matchIndex, current - matchIndex);
1058                if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; }    /* match outside of valid area */
1059                assert(matchIndex < current);
1060                if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
1061                  && (matchIndex+LZ4_DISTANCE_MAX < current)) {
1062                    continue;
1063                } /* too far */
1064                assert((current - matchIndex) <= LZ4_DISTANCE_MAX);  /* match now expected within distance */
1065
1066                if (LZ4_read32(match) == LZ4_read32(ip)) {
1067                    if (maybe_extMem) offset = current - matchIndex;
1068                    break;   /* match found */
1069                }
1070
1071            } while(1);
1072        }
1073
1074        /* Catch up */
1075        filledIp = ip;
1076        while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
1077
1078        /* Encode Literals */
1079        {   unsigned const litLength = (unsigned)(ip - anchor);
1080            token = op++;
1081            if ((outputDirective == limitedOutput) &&  /* Check output buffer overflow */
1082                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
1083                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
1084            }
1085            if ((outputDirective == fillOutput) &&
1086                (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
1087                op--;
1088                goto _last_literals;
1089            }
1090            if (litLength >= RUN_MASK) {
1091                int len = (int)(litLength - RUN_MASK);
1092                *token = (RUN_MASK<<ML_BITS);
1093                for(; len >= 255 ; len-=255) *op++ = 255;
1094                *op++ = (BYTE)len;
1095            }
1096            else *token = (BYTE)(litLength<<ML_BITS);
1097
1098            /* Copy Literals */
1099            LZ4_wildCopy8(op, anchor, op+litLength);
1100            op+=litLength;
1101            DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
1102                        (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
1103        }
1104
1105_next_match:
1106        /* at this stage, the following variables must be correctly set :
1107         * - ip : at start of LZ operation
1108         * - match : at start of previous pattern occurrence; can be within current prefix, or within extDict
1109         * - offset : if maybe_ext_memSegment==1 (constant)
1110         * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
1111         * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
1112         */
1113
1114        if ((outputDirective == fillOutput) &&
1115            (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
1116            /* the match was too close to the end, rewind and go to last literals */
1117            op = token;
1118            goto _last_literals;
1119        }
1120
1121        /* Encode Offset */
1122        if (maybe_extMem) {   /* static test */
1123            DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
1124            assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
1125            LZ4_writeLE16(op, (U16)offset); op+=2;
1126        } else  {
1127            DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
1128            assert(ip-match <= LZ4_DISTANCE_MAX);
1129            LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
1130        }
1131
1132        /* Encode MatchLength */
1133        {   unsigned matchCode;
1134
1135            if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
1136              && (lowLimit==dictionary) /* match within extDict */ ) {
1137                const BYTE* limit = ip + (dictEnd-match);
1138                assert(dictEnd > match);
1139                if (limit > matchlimit) limit = matchlimit;
1140                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
1141                ip += (size_t)matchCode + MINMATCH;
1142                if (ip==limit) {
1143                    unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
1144                    matchCode += more;
1145                    ip += more;
1146                }
1147                DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
1148            } else {
1149                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
1150                ip += (size_t)matchCode + MINMATCH;
1151                DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
1152            }
1153
1154            if ((outputDirective) &&    /* Check output buffer overflow */
1155                (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
1156                if (outputDirective == fillOutput) {
1157                    /* Match description too long : reduce it */
1158                    U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
1159                    ip -= matchCode - newMatchCode;
1160                    assert(newMatchCode < matchCode);
1161                    matchCode = newMatchCode;
1162                    if (unlikely(ip <= filledIp)) {
1163                        /* We have already filled up to filledIp so if ip ends up less than filledIp
1164                         * we have positions in the hash table beyond the current position. This is
1165                         * a problem if we reuse the hash table. So we have to remove these positions
1166                         * from the hash table.
1167                         */
1168                        const BYTE* ptr;
1169                        DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
1170                        for (ptr = ip; ptr <= filledIp; ++ptr) {
1171                            U32 const h = LZ4_hashPosition(ptr, tableType);
1172                            LZ4_clearHash(h, cctx->hashTable, tableType);
1173                        }
1174                    }
1175                } else {
1176                    assert(outputDirective == limitedOutput);
1177                    return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
1178                }
1179            }
1180            if (matchCode >= ML_MASK) {
1181                *token += ML_MASK;
1182                matchCode -= ML_MASK;
1183                LZ4_write32(op, 0xFFFFFFFF);
1184                while (matchCode >= 4*255) {
1185                    op+=4;
1186                    LZ4_write32(op, 0xFFFFFFFF);
1187                    matchCode -= 4*255;
1188                }
1189                op += matchCode / 255;
1190                *op++ = (BYTE)(matchCode % 255);
1191            } else
1192                *token += (BYTE)(matchCode);
1193        }
1194        /* Ensure we have enough space for the last literals. */
1195        assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
1196
1197        anchor = ip;
1198
1199        /* Test end of chunk */
1200        if (ip >= mflimitPlusOne) break;
1201
1202        /* Fill table */
1203        LZ4_putPosition(ip-2, cctx->hashTable, tableType, base);
1204
1205        /* Test next position */
1206        if (tableType == byPtr) {
1207
1208            match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
1209            LZ4_putPosition(ip, cctx->hashTable, tableType, base);
1210            if ( (match+LZ4_DISTANCE_MAX >= ip)
1211              && (LZ4_read32(match) == LZ4_read32(ip)) )
1212            { token=op++; *token=0; goto _next_match; }
1213
1214        } else {   /* byU32, byU16 */
1215
1216            U32 const h = LZ4_hashPosition(ip, tableType);
1217            U32 const current = (U32)(ip-base);
1218            U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
1219            assert(matchIndex < current);
1220            if (dictDirective == usingDictCtx) {
1221                if (matchIndex < startIndex) {
1222                    /* there was no match, try the dictionary */
1223                    matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
1224                    match = dictBase + matchIndex;
1225                    lowLimit = dictionary;   /* required for match length counter */
1226                    matchIndex += dictDelta;
1227                } else {
1228                    match = base + matchIndex;
1229                    lowLimit = (const BYTE*)source;  /* required for match length counter */
1230                }
1231            } else if (dictDirective==usingExtDict) {
1232                if (matchIndex < startIndex) {
1233                    assert(dictBase);
1234                    match = dictBase + matchIndex;
1235                    lowLimit = dictionary;   /* required for match length counter */
1236                } else {
1237                    match = base + matchIndex;
1238                    lowLimit = (const BYTE*)source;   /* required for match length counter */
1239                }
1240            } else {   /* single memory segment */
1241                match = base + matchIndex;
1242            }
1243            LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
1244            assert(matchIndex < current);
1245            if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
1246              && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
1247              && (LZ4_read32(match) == LZ4_read32(ip)) ) {
1248                token=op++;
1249                *token=0;
1250                if (maybe_extMem) offset = current - matchIndex;
1251                DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
1252                            (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
1253                goto _next_match;
1254            }
1255        }
1256
1257        /* Prepare next loop */
1258        forwardH = LZ4_hashPosition(++ip, tableType);
1259
1260    }
1261
1262_last_literals:
1263    /* Encode Last Literals */
1264    {   size_t lastRun = (size_t)(iend - anchor);
1265        if ( (outputDirective) &&  /* Check output buffer overflow */
1266            (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
1267            if (outputDirective == fillOutput) {
1268                /* adapt lastRun to fill 'dst' */
1269                assert(olimit >= op);
1270                lastRun  = (size_t)(olimit-op) - 1/*token*/;
1271                lastRun -= (lastRun + 256 - RUN_MASK) / 256;  /*additional length tokens*/
1272            } else {
1273                assert(outputDirective == limitedOutput);
1274                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
1275            }
1276        }
1277        DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
1278        if (lastRun >= RUN_MASK) {
1279            size_t accumulator = lastRun - RUN_MASK;
1280            *op++ = RUN_MASK << ML_BITS;
1281            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
1282            *op++ = (BYTE) accumulator;
1283        } else {
1284            *op++ = (BYTE)(lastRun<<ML_BITS);
1285        }
1286        LZ4_memcpy(op, anchor, lastRun);
1287        ip = anchor + lastRun;
1288        op += lastRun;
1289    }
1290
1291    if (outputDirective == fillOutput) {
1292        *inputConsumed = (int) (((const char*)ip)-source);
1293    }
1294    result = (int)(((char*)op) - dest);
1295    assert(result > 0);
1296    DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
1297    return result;
1298}
1299
1300/** LZ4_compress_generic() :
1301 *  inlined, to ensure branches are decided at compilation time;
1302 *  takes care of src == (NULL, 0)
1303 *  and forward the rest to LZ4_compress_generic_validated */
1304LZ4_FORCE_INLINE int LZ4_compress_generic(
1305                 LZ4_stream_t_internal* const cctx,
1306                 const char* const src,
1307                 char* const dst,
1308                 const int srcSize,
1309                 int *inputConsumed, /* only written when outputDirective == fillOutput */
1310                 const int dstCapacity,
1311                 const limitedOutput_directive outputDirective,
1312                 const tableType_t tableType,
1313                 const dict_directive dictDirective,
1314                 const dictIssue_directive dictIssue,
1315                 const int acceleration)
1316{
1317    DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
1318                srcSize, dstCapacity);
1319
1320    if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; }  /* Unsupported srcSize, too large (or negative) */
1321    if (srcSize == 0) {   /* src == NULL supported if srcSize == 0 */
1322        if (outputDirective != notLimited && dstCapacity <= 0) return 0;  /* no output, can't write anything */
1323        DEBUGLOG(5, "Generating an empty block");
1324        assert(outputDirective == notLimited || dstCapacity >= 1);
1325        assert(dst != NULL);
1326        dst[0] = 0;
1327        if (outputDirective == fillOutput) {
1328            assert (inputConsumed != NULL);
1329            *inputConsumed = 0;
1330        }
1331        return 1;
1332    }
1333    assert(src != NULL);
1334
1335    return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
1336                inputConsumed, /* only written into if outputDirective == fillOutput */
1337                dstCapacity, outputDirective,
1338                tableType, dictDirective, dictIssue, acceleration);
1339}
1340
1341
1342int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
1343{
1344    LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
1345    assert(ctx != NULL);
1346    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
1347    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
1348    if (maxOutputSize >= LZ4_compressBound(inputSize)) {
1349        if (inputSize < LZ4_64Klimit) {
1350            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
1351        } else {
1352            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1353            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
1354        }
1355    } else {
1356        if (inputSize < LZ4_64Klimit) {
1357            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
1358        } else {
1359            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1360            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1361        }
1362    }
1363}
1364
1365/**
1366 * LZ4_compress_fast_extState_fastReset() :
1367 * A variant of LZ4_compress_fast_extState().
1368 *
1369 * Using this variant avoids an expensive initialization step. It is only safe
1370 * to call if the state buffer is known to be correctly initialized already
1371 * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
1372 * "correctly initialized").
1373 */
1374int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
1375{
1376    LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
1377    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
1378    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
1379
1380    if (dstCapacity >= LZ4_compressBound(srcSize)) {
1381        if (srcSize < LZ4_64Klimit) {
1382            const tableType_t tableType = byU16;
1383            LZ4_prepareTable(ctx, srcSize, tableType);
1384            if (ctx->currentOffset) {
1385                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
1386            } else {
1387                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
1388            }
1389        } else {
1390            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1391            LZ4_prepareTable(ctx, srcSize, tableType);
1392            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
1393        }
1394    } else {
1395        if (srcSize < LZ4_64Klimit) {
1396            const tableType_t tableType = byU16;
1397            LZ4_prepareTable(ctx, srcSize, tableType);
1398            if (ctx->currentOffset) {
1399                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
1400            } else {
1401                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1402            }
1403        } else {
1404            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1405            LZ4_prepareTable(ctx, srcSize, tableType);
1406            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1407        }
1408    }
1409}
1410
1411
1412int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
1413{
1414    int result;
1415#if (LZ4_HEAPMODE)
1416    LZ4_stream_t* ctxPtr = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
1417    if (ctxPtr == NULL) return 0;
1418#else
1419    LZ4_stream_t ctx;
1420    LZ4_stream_t* const ctxPtr = &ctx;
1421#endif
1422    result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
1423
1424#if (LZ4_HEAPMODE)
1425    FREEMEM(ctxPtr);
1426#endif
1427    return result;
1428}
1429
1430
1431int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize)
1432{
1433    return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1);
1434}
1435
1436
1437/* Note!: This function leaves the stream in an unclean/broken state!
1438 * It is not safe to subsequently use the same state with a _fastReset() or
1439 * _continue() call without resetting it. */
1440static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
1441{
1442    void* const s = LZ4_initStream(state, sizeof (*state));
1443    assert(s != NULL); (void)s;
1444
1445    if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
1446        return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
1447    } else {
1448        if (*srcSizePtr < LZ4_64Klimit) {
1449            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
1450        } else {
1451            tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1452            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1);
1453    }   }
1454}
1455
1456
1457int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
1458{
1459#if (LZ4_HEAPMODE)
1460    LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
1461    if (ctx == NULL) return 0;
1462#else
1463    LZ4_stream_t ctxBody;
1464    LZ4_stream_t* ctx = &ctxBody;
1465#endif
1466
1467    int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
1468
1469#if (LZ4_HEAPMODE)
1470    FREEMEM(ctx);
1471#endif
1472    return result;
1473}
1474
1475
1476
1477/*-******************************
1478*  Streaming functions
1479********************************/
1480
1481#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
1482LZ4_stream_t* LZ4_createStream(void)
1483{
1484    LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
1485    LZ4_STATIC_ASSERT(sizeof(LZ4_stream_t) >= sizeof(LZ4_stream_t_internal));
1486    DEBUGLOG(4, "LZ4_createStream %p", lz4s);
1487    if (lz4s == NULL) return NULL;
1488    LZ4_initStream(lz4s, sizeof(*lz4s));
1489    return lz4s;
1490}
1491#endif
1492
1493static size_t LZ4_stream_t_alignment(void)
1494{
1495#if LZ4_ALIGN_TEST
1496    typedef struct { char c; LZ4_stream_t t; } t_a;
1497    return sizeof(t_a) - sizeof(LZ4_stream_t);
1498#else
1499    return 1;  /* effectively disabled */
1500#endif
1501}
1502
1503LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
1504{
1505    DEBUGLOG(5, "LZ4_initStream");
1506    if (buffer == NULL) { return NULL; }
1507    if (size < sizeof(LZ4_stream_t)) { return NULL; }
1508    if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
1509    MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
1510    return (LZ4_stream_t*)buffer;
1511}
1512
1513/* resetStream is now deprecated,
1514 * prefer initStream() which is more general */
1515void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
1516{
1517    DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
1518    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
1519}
1520
1521void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
1522    LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
1523}
1524
1525#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
1526int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
1527{
1528    if (!LZ4_stream) return 0;   /* support free on NULL */
1529    DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
1530    FREEMEM(LZ4_stream);
1531    return (0);
1532}
1533#endif
1534
1535
1536#define HASH_UNIT sizeof(reg_t)
1537int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
1538{
1539    LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse;
1540    const tableType_t tableType = byU32;
1541    const BYTE* p = (const BYTE*)dictionary;
1542    const BYTE* const dictEnd = p + dictSize;
1543    const BYTE* base;
1544
1545    DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
1546
1547    /* It's necessary to reset the context,
1548     * and not just continue it with prepareTable()
1549     * to avoid any risk of generating overflowing matchIndex
1550     * when compressing using this dictionary */
1551    LZ4_resetStream(LZ4_dict);
1552
1553    /* We always increment the offset by 64 KB, since, if the dict is longer,
1554     * we truncate it to the last 64k, and if it's shorter, we still want to
1555     * advance by a whole window length so we can provide the guarantee that
1556     * there are only valid offsets in the window, which allows an optimization
1557     * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
1558     * dictionary isn't a full 64k. */
1559    dict->currentOffset += 64 KB;
1560
1561    if (dictSize < (int)HASH_UNIT) {
1562        return 0;
1563    }
1564
1565    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
1566    base = dictEnd - dict->currentOffset;
1567    dict->dictionary = p;
1568    dict->dictSize = (U32)(dictEnd - p);
1569    dict->tableType = (U32)tableType;
1570
1571    while (p <= dictEnd-HASH_UNIT) {
1572        LZ4_putPosition(p, dict->hashTable, tableType, base);
1573        p+=3;
1574    }
1575
1576    return (int)dict->dictSize;
1577}
1578
1579void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream)
1580{
1581    const LZ4_stream_t_internal* dictCtx = (dictionaryStream == NULL) ? NULL :
1582        &(dictionaryStream->internal_donotuse);
1583
1584    DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)",
1585             workingStream, dictionaryStream,
1586             dictCtx != NULL ? dictCtx->dictSize : 0);
1587
1588    if (dictCtx != NULL) {
1589        /* If the current offset is zero, we will never look in the
1590         * external dictionary context, since there is no value a table
1591         * entry can take that indicate a miss. In that case, we need
1592         * to bump the offset to something non-zero.
1593         */
1594        if (workingStream->internal_donotuse.currentOffset == 0) {
1595            workingStream->internal_donotuse.currentOffset = 64 KB;
1596        }
1597
1598        /* Don't actually attach an empty dictionary.
1599         */
1600        if (dictCtx->dictSize == 0) {
1601            dictCtx = NULL;
1602        }
1603    }
1604    workingStream->internal_donotuse.dictCtx = dictCtx;
1605}
1606
1607
1608static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
1609{
1610    assert(nextSize >= 0);
1611    if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
1612        /* rescale hash table */
1613        U32 const delta = LZ4_dict->currentOffset - 64 KB;
1614        const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
1615        int i;
1616        DEBUGLOG(4, "LZ4_renormDictT");
1617        for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
1618            if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
1619            else LZ4_dict->hashTable[i] -= delta;
1620        }
1621        LZ4_dict->currentOffset = 64 KB;
1622        if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
1623        LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
1624    }
1625}
1626
1627
1628int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
1629                                const char* source, char* dest,
1630                                int inputSize, int maxOutputSize,
1631                                int acceleration)
1632{
1633    const tableType_t tableType = byU32;
1634    LZ4_stream_t_internal* const streamPtr = &LZ4_stream->internal_donotuse;
1635    const char* dictEnd = streamPtr->dictSize ? (const char*)streamPtr->dictionary + streamPtr->dictSize : NULL;
1636
1637    DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i, dictSize=%u)", inputSize, streamPtr->dictSize);
1638
1639    LZ4_renormDictT(streamPtr, inputSize);   /* fix index overflow */
1640    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
1641    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
1642
1643    /* invalidate tiny dictionaries */
1644    if ( (streamPtr->dictSize < 4)     /* tiny dictionary : not enough for a hash */
1645      && (dictEnd != source)           /* prefix mode */
1646      && (inputSize > 0)               /* tolerance : don't lose history, in case next invocation would use prefix mode */
1647      && (streamPtr->dictCtx == NULL)  /* usingDictCtx */
1648      ) {
1649        DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
1650        /* remove dictionary existence from history, to employ faster prefix mode */
1651        streamPtr->dictSize = 0;
1652        streamPtr->dictionary = (const BYTE*)source;
1653        dictEnd = source;
1654    }
1655
1656    /* Check overlapping input/dictionary space */
1657    {   const char* const sourceEnd = source + inputSize;
1658        if ((sourceEnd > (const char*)streamPtr->dictionary) && (sourceEnd < dictEnd)) {
1659            streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
1660            if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
1661            if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
1662            streamPtr->dictionary = (const BYTE*)dictEnd - streamPtr->dictSize;
1663        }
1664    }
1665
1666    /* prefix mode : source data follows dictionary */
1667    if (dictEnd == source) {
1668        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
1669            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
1670        else
1671            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
1672    }
1673
1674    /* external dictionary mode */
1675    {   int result;
1676        if (streamPtr->dictCtx) {
1677            /* We depend here on the fact that dictCtx'es (produced by
1678             * LZ4_loadDict) guarantee that their tables contain no references
1679             * to offsets between dictCtx->currentOffset - 64 KB and
1680             * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
1681             * to use noDictIssue even when the dict isn't a full 64 KB.
1682             */
1683            if (inputSize > 4 KB) {
1684                /* For compressing large blobs, it is faster to pay the setup
1685                 * cost to copy the dictionary's tables into the active context,
1686                 * so that the compression loop is only looking into one table.
1687                 */
1688                LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
1689                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
1690            } else {
1691                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
1692            }
1693        } else {  /* small data <= 4 KB */
1694            if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
1695                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
1696            } else {
1697                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
1698            }
1699        }
1700        streamPtr->dictionary = (const BYTE*)source;
1701        streamPtr->dictSize = (U32)inputSize;
1702        return result;
1703    }
1704}
1705
1706
1707/* Hidden debug function, to force-test external dictionary mode */
1708int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
1709{
1710    LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse;
1711    int result;
1712
1713    LZ4_renormDictT(streamPtr, srcSize);
1714
1715    if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
1716        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
1717    } else {
1718        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
1719    }
1720
1721    streamPtr->dictionary = (const BYTE*)source;
1722    streamPtr->dictSize = (U32)srcSize;
1723
1724    return result;
1725}
1726
1727
1728/*! LZ4_saveDict() :
1729 *  If previously compressed data block is not guaranteed to remain available at its memory location,
1730 *  save it into a safer place (char* safeBuffer).
1731 *  Note : no need to call LZ4_loadDict() afterwards, dictionary is immediately usable,
1732 *         one can therefore call LZ4_compress_fast_continue() right after.
1733 * @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
1734 */
1735int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
1736{
1737    LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
1738
1739    DEBUGLOG(5, "LZ4_saveDict : dictSize=%i, safeBuffer=%p", dictSize, safeBuffer);
1740
1741    if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
1742    if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
1743
1744    if (safeBuffer == NULL) assert(dictSize == 0);
1745    if (dictSize > 0) {
1746        const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
1747        assert(dict->dictionary);
1748        LZ4_memmove(safeBuffer, previousDictEnd - dictSize, (size_t)dictSize);
1749    }
1750
1751    dict->dictionary = (const BYTE*)safeBuffer;
1752    dict->dictSize = (U32)dictSize;
1753
1754    return dictSize;
1755}
1756
1757
1758
1759/*-*******************************
1760 *  Decompression functions
1761 ********************************/
1762
1763typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
1764
1765#undef MIN
1766#define MIN(a,b)    ( (a) < (b) ? (a) : (b) )
1767
1768
1769/* variant for decompress_unsafe()
1770 * does not know end of input
1771 * presumes input is well formed
1772 * note : will consume at least one byte */
1773size_t read_long_length_no_check(const BYTE** pp)
1774{
1775    size_t b, l = 0;
1776    do { b = **pp; (*pp)++; l += b; } while (b==255);
1777    DEBUGLOG(6, "read_long_length_no_check: +length=%zu using %zu input bytes", l, l/255 + 1)
1778    return l;
1779}
1780
1781/* core decoder variant for LZ4_decompress_fast*()
1782 * for legacy support only : these entry points are deprecated.
1783 * - Presumes input is correctly formed (no defense vs malformed inputs)
1784 * - Does not know input size (presume input buffer is "large enough")
1785 * - Decompress a full block (only)
1786 * @return : nb of bytes read from input.
1787 * Note : this variant is not optimized for speed, just for maintenance.
1788 *        the goal is to remove support of decompress_fast*() variants by v2.0
1789**/
1790LZ4_FORCE_INLINE int
1791LZ4_decompress_unsafe_generic(
1792                 const BYTE* const istart,
1793                 BYTE* const ostart,
1794                 int decompressedSize,
1795
1796                 size_t prefixSize,
1797                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
1798                 const size_t dictSize         /* note: =0 if dictStart==NULL */
1799                 )
1800{
1801    const BYTE* ip = istart;
1802    BYTE* op = (BYTE*)ostart;
1803    BYTE* const oend = ostart + decompressedSize;
1804    const BYTE* const prefixStart = ostart - prefixSize;
1805
1806    DEBUGLOG(5, "LZ4_decompress_unsafe_generic");
1807    if (dictStart == NULL) assert(dictSize == 0);
1808
1809    while (1) {
1810        /* start new sequence */
1811        unsigned token = *ip++;
1812
1813        /* literals */
1814        {   size_t ll = token >> ML_BITS;
1815            if (ll==15) {
1816                /* long literal length */
1817                ll += read_long_length_no_check(&ip);
1818            }
1819            if ((size_t)(oend-op) < ll) return -1; /* output buffer overflow */
1820            LZ4_memmove(op, ip, ll); /* support in-place decompression */
1821            op += ll;
1822            ip += ll;
1823            if ((size_t)(oend-op) < MFLIMIT) {
1824                if (op==oend) break;  /* end of block */
1825                DEBUGLOG(5, "invalid: literals end at distance %zi from end of block", oend-op);
1826                /* incorrect end of block :
1827                 * last match must start at least MFLIMIT==12 bytes before end of output block */
1828                return -1;
1829        }   }
1830
1831        /* match */
1832        {   size_t ml = token & 15;
1833            size_t const offset = LZ4_readLE16(ip);
1834            ip+=2;
1835
1836            if (ml==15) {
1837                /* long literal length */
1838                ml += read_long_length_no_check(&ip);
1839            }
1840            ml += MINMATCH;
1841
1842            if ((size_t)(oend-op) < ml) return -1; /* output buffer overflow */
1843
1844            {   const BYTE* match = op - offset;
1845
1846                /* out of range */
1847                if (offset > (size_t)(op - prefixStart) + dictSize) {
1848                    DEBUGLOG(6, "offset out of range");
1849                    return -1;
1850                }
1851
1852                /* check special case : extDict */
1853                if (offset > (size_t)(op - prefixStart)) {
1854                    /* extDict scenario */
1855                    const BYTE* const dictEnd = dictStart + dictSize;
1856                    const BYTE* extMatch = dictEnd - (offset - (size_t)(op-prefixStart));
1857                    size_t const extml = (size_t)(dictEnd - extMatch);
1858                    if (extml > ml) {
1859                        /* match entirely within extDict */
1860                        LZ4_memmove(op, extMatch, ml);
1861                        op += ml;
1862                        ml = 0;
1863                    } else {
1864                        /* match split between extDict & prefix */
1865                        LZ4_memmove(op, extMatch, extml);
1866                        op += extml;
1867                        ml -= extml;
1868                    }
1869                    match = prefixStart;
1870                }
1871
1872                /* match copy - slow variant, supporting overlap copy */
1873                {   size_t u;
1874                    for (u=0; u<ml; u++) {
1875                        op[u] = match[u];
1876            }   }   }
1877            op += ml;
1878            if ((size_t)(oend-op) < LASTLITERALS) {
1879                DEBUGLOG(5, "invalid: match ends at distance %zi from end of block", oend-op);
1880                /* incorrect end of block :
1881                 * last match must stop at least LASTLITERALS==5 bytes before end of output block */
1882                return -1;
1883            }
1884        } /* match */
1885    } /* main loop */
1886    return (int)(ip - istart);
1887}
1888
1889
1890/* Read the variable-length literal or match length.
1891 *
1892 * @ip : input pointer
1893 * @ilimit : position after which if length is not decoded, the input is necessarily corrupted.
1894 * @initial_check - check ip >= ipmax before start of loop.  Returns initial_error if so.
1895 * @error (output) - error code.  Must be set to 0 before call.
1896**/
1897typedef size_t Rvl_t;
1898static const Rvl_t rvl_error = (Rvl_t)(-1);
1899LZ4_FORCE_INLINE Rvl_t
1900read_variable_length(const BYTE** ip, const BYTE* ilimit,
1901                     int initial_check)
1902{
1903    Rvl_t s, length = 0;
1904    assert(ip != NULL);
1905    assert(*ip !=  NULL);
1906    assert(ilimit != NULL);
1907    if (initial_check && unlikely((*ip) >= ilimit)) {    /* read limit reached */
1908        return rvl_error;
1909    }
1910    do {
1911        s = **ip;
1912        (*ip)++;
1913        length += s;
1914        if (unlikely((*ip) > ilimit)) {    /* read limit reached */
1915            return rvl_error;
1916        }
1917        /* accumulator overflow detection (32-bit mode only) */
1918        if ((sizeof(length)<8) && unlikely(length > ((Rvl_t)(-1)/2)) ) {
1919            return rvl_error;
1920        }
1921    } while (s==255);
1922
1923    return length;
1924}
1925
1926/*! LZ4_decompress_generic() :
1927 *  This generic decompression function covers all use cases.
1928 *  It shall be instantiated several times, using different sets of directives.
1929 *  Note that it is important for performance that this function really get inlined,
1930 *  in order to remove useless branches during compilation optimization.
1931 */
1932LZ4_FORCE_INLINE int
1933LZ4_decompress_generic(
1934                 const char* const src,
1935                 char* const dst,
1936                 int srcSize,
1937                 int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
1938
1939                 earlyEnd_directive partialDecoding,  /* full, partial */
1940                 dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
1941                 const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
1942                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
1943                 const size_t dictSize         /* note : = 0 if noDict */
1944                 )
1945{
1946    if ((src == NULL) || (outputSize < 0)) { return -1; }
1947
1948    {   const BYTE* ip = (const BYTE*) src;
1949        const BYTE* const iend = ip + srcSize;
1950
1951        BYTE* op = (BYTE*) dst;
1952        BYTE* const oend = op + outputSize;
1953        BYTE* cpy;
1954
1955        const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
1956
1957        const int checkOffset = (dictSize < (int)(64 KB));
1958
1959
1960        /* Set up the "end" pointers for the shortcut. */
1961        const BYTE* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/;
1962        const BYTE* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/;
1963
1964        const BYTE* match;
1965        size_t offset;
1966        unsigned token;
1967        size_t length;
1968
1969
1970        DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
1971
1972        /* Special cases */
1973        assert(lowPrefix <= op);
1974        if (unlikely(outputSize==0)) {
1975            /* Empty output buffer */
1976            if (partialDecoding) return 0;
1977            return ((srcSize==1) && (*ip==0)) ? 0 : -1;
1978        }
1979        if (unlikely(srcSize==0)) { return -1; }
1980
1981    /* LZ4_FAST_DEC_LOOP:
1982     * designed for modern OoO performance cpus,
1983     * where copying reliably 32-bytes is preferable to an unpredictable branch.
1984     * note : fast loop may show a regression for some client arm chips. */
1985#if LZ4_FAST_DEC_LOOP
1986        if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
1987            DEBUGLOG(6, "skip fast decode loop");
1988            goto safe_decode;
1989        }
1990
1991        /* Fast loop : decode sequences as long as output < oend-FASTLOOP_SAFE_DISTANCE */
1992        while (1) {
1993            /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
1994            assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
1995            assert(ip < iend);
1996            token = *ip++;
1997            length = token >> ML_BITS;  /* literal length */
1998
1999            /* decode literal length */
2000            if (length == RUN_MASK) {
2001                size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
2002                if (addl == rvl_error) { goto _output_error; }
2003                length += addl;
2004                if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
2005                if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
2006
2007                /* copy literals */
2008                cpy = op+length;
2009                LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
2010                if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
2011                LZ4_wildCopy32(op, ip, cpy);
2012                ip += length; op = cpy;
2013            } else {
2014                cpy = op+length;
2015                DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
2016                /* We don't need to check oend, since we check it once for each loop below */
2017                if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
2018                /* Literals can only be <= 14, but hope compilers optimize better when copy by a register size */
2019                LZ4_memcpy(op, ip, 16);
2020                ip += length; op = cpy;
2021            }
2022
2023            /* get offset */
2024            offset = LZ4_readLE16(ip); ip+=2;
2025            match = op - offset;
2026            assert(match <= op);  /* overflow check */
2027
2028            /* get matchlength */
2029            length = token & ML_MASK;
2030
2031            if (length == ML_MASK) {
2032                size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
2033                if (addl == rvl_error) { goto _output_error; }
2034                length += addl;
2035                length += MINMATCH;
2036                if (unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
2037                if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
2038                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
2039                    goto safe_match_copy;
2040                }
2041            } else {
2042                length += MINMATCH;
2043                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
2044                    goto safe_match_copy;
2045                }
2046
2047                /* Fastpath check: skip LZ4_wildCopy32 when true */
2048                if ((dict == withPrefix64k) || (match >= lowPrefix)) {
2049                    if (offset >= 8) {
2050                        assert(match >= lowPrefix);
2051                        assert(match <= op);
2052                        assert(op + 18 <= oend);
2053
2054                        LZ4_memcpy(op, match, 8);
2055                        LZ4_memcpy(op+8, match+8, 8);
2056                        LZ4_memcpy(op+16, match+16, 2);
2057                        op += length;
2058                        continue;
2059            }   }   }
2060
2061            if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
2062            /* match starting within external dictionary */
2063            if ((dict==usingExtDict) && (match < lowPrefix)) {
2064                assert(dictEnd != NULL);
2065                if (unlikely(op+length > oend-LASTLITERALS)) {
2066                    if (partialDecoding) {
2067                        DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
2068                        length = MIN(length, (size_t)(oend-op));
2069                    } else {
2070                        goto _output_error;  /* end-of-block condition violated */
2071                }   }
2072
2073                if (length <= (size_t)(lowPrefix-match)) {
2074                    /* match fits entirely within external dictionary : just copy */
2075                    LZ4_memmove(op, dictEnd - (lowPrefix-match), length);
2076                    op += length;
2077                } else {
2078                    /* match stretches into both external dictionary and current block */
2079                    size_t const copySize = (size_t)(lowPrefix - match);
2080                    size_t const restSize = length - copySize;
2081                    LZ4_memcpy(op, dictEnd - copySize, copySize);
2082                    op += copySize;
2083                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
2084                        BYTE* const endOfMatch = op + restSize;
2085                        const BYTE* copyFrom = lowPrefix;
2086                        while (op < endOfMatch) { *op++ = *copyFrom++; }
2087                    } else {
2088                        LZ4_memcpy(op, lowPrefix, restSize);
2089                        op += restSize;
2090                }   }
2091                continue;
2092            }
2093
2094            /* copy match within block */
2095            cpy = op + length;
2096
2097            assert((op <= oend) && (oend-op >= 32));
2098            if (unlikely(offset<16)) {
2099                LZ4_memcpy_using_offset(op, match, cpy, offset);
2100            } else {
2101                LZ4_wildCopy32(op, match, cpy);
2102            }
2103
2104            op = cpy;   /* wildcopy correction */
2105        }
2106    safe_decode:
2107#endif
2108
2109        /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
2110        while (1) {
2111            assert(ip < iend);
2112            token = *ip++;
2113            length = token >> ML_BITS;  /* literal length */
2114
2115            /* A two-stage shortcut for the most common case:
2116             * 1) If the literal length is 0..14, and there is enough space,
2117             * enter the shortcut and copy 16 bytes on behalf of the literals
2118             * (in the fast mode, only 8 bytes can be safely copied this way).
2119             * 2) Further if the match length is 4..18, copy 18 bytes in a similar
2120             * manner; but we ensure that there's enough space in the output for
2121             * those 18 bytes earlier, upon entering the shortcut (in other words,
2122             * there is a combined check for both stages).
2123             */
2124            if ( (length != RUN_MASK)
2125                /* strictly "less than" on input, to re-enter the loop with at least one byte */
2126              && likely((ip < shortiend) & (op <= shortoend)) ) {
2127                /* Copy the literals */
2128                LZ4_memcpy(op, ip, 16);
2129                op += length; ip += length;
2130
2131                /* The second stage: prepare for match copying, decode full info.
2132                 * If it doesn't work out, the info won't be wasted. */
2133                length = token & ML_MASK; /* match length */
2134                offset = LZ4_readLE16(ip); ip += 2;
2135                match = op - offset;
2136                assert(match <= op); /* check overflow */
2137
2138                /* Do not deal with overlapping matches. */
2139                if ( (length != ML_MASK)
2140                  && (offset >= 8)
2141                  && (dict==withPrefix64k || match >= lowPrefix) ) {
2142                    /* Copy the match. */
2143                    LZ4_memcpy(op + 0, match + 0, 8);
2144                    LZ4_memcpy(op + 8, match + 8, 8);
2145                    LZ4_memcpy(op +16, match +16, 2);
2146                    op += length + MINMATCH;
2147                    /* Both stages worked, load the next token. */
2148                    continue;
2149                }
2150
2151                /* The second stage didn't work out, but the info is ready.
2152                 * Propel it right to the point of match copying. */
2153                goto _copy_match;
2154            }
2155
2156            /* decode literal length */
2157            if (length == RUN_MASK) {
2158                size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
2159                if (addl == rvl_error) { goto _output_error; }
2160                length += addl;
2161                if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
2162                if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
2163            }
2164
2165            /* copy literals */
2166            cpy = op+length;
2167#if LZ4_FAST_DEC_LOOP
2168        safe_literal_copy:
2169#endif
2170            LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
2171            if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) {
2172                /* We've either hit the input parsing restriction or the output parsing restriction.
2173                 * In the normal scenario, decoding a full block, it must be the last sequence,
2174                 * otherwise it's an error (invalid input or dimensions).
2175                 * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
2176                 */
2177                if (partialDecoding) {
2178                    /* Since we are partial decoding we may be in this block because of the output parsing
2179                     * restriction, which is not valid since the output buffer is allowed to be undersized.
2180                     */
2181                    DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
2182                    DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
2183                    DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
2184                    DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
2185                    /* Finishing in the middle of a literals segment,
2186                     * due to lack of input.
2187                     */
2188                    if (ip+length > iend) {
2189                        length = (size_t)(iend-ip);
2190                        cpy = op + length;
2191                    }
2192                    /* Finishing in the middle of a literals segment,
2193                     * due to lack of output space.
2194                     */
2195                    if (cpy > oend) {
2196                        cpy = oend;
2197                        assert(op<=oend);
2198                        length = (size_t)(oend-op);
2199                    }
2200                } else {
2201                     /* We must be on the last sequence (or invalid) because of the parsing limitations
2202                      * so check that we exactly consume the input and don't overrun the output buffer.
2203                      */
2204                    if ((ip+length != iend) || (cpy > oend)) {
2205                        DEBUGLOG(6, "should have been last run of literals")
2206                        DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
2207                        DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
2208                        goto _output_error;
2209                    }
2210                }
2211                LZ4_memmove(op, ip, length);  /* supports overlapping memory regions, for in-place decompression scenarios */
2212                ip += length;
2213                op += length;
2214                /* Necessarily EOF when !partialDecoding.
2215                 * When partialDecoding, it is EOF if we've either
2216                 * filled the output buffer or
2217                 * can't proceed with reading an offset for following match.
2218                 */
2219                if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
2220                    break;
2221                }
2222            } else {
2223                LZ4_wildCopy8(op, ip, cpy);   /* can overwrite up to 8 bytes beyond cpy */
2224                ip += length; op = cpy;
2225            }
2226
2227            /* get offset */
2228            offset = LZ4_readLE16(ip); ip+=2;
2229            match = op - offset;
2230
2231            /* get matchlength */
2232            length = token & ML_MASK;
2233
2234    _copy_match:
2235            if (length == ML_MASK) {
2236                size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
2237                if (addl == rvl_error) { goto _output_error; }
2238                length += addl;
2239                if (unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
2240            }
2241            length += MINMATCH;
2242
2243#if LZ4_FAST_DEC_LOOP
2244        safe_match_copy:
2245#endif
2246            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
2247            /* match starting within external dictionary */
2248            if ((dict==usingExtDict) && (match < lowPrefix)) {
2249                assert(dictEnd != NULL);
2250                if (unlikely(op+length > oend-LASTLITERALS)) {
2251                    if (partialDecoding) length = MIN(length, (size_t)(oend-op));
2252                    else goto _output_error;   /* doesn't respect parsing restriction */
2253                }
2254
2255                if (length <= (size_t)(lowPrefix-match)) {
2256                    /* match fits entirely within external dictionary : just copy */
2257                    LZ4_memmove(op, dictEnd - (lowPrefix-match), length);
2258                    op += length;
2259                } else {
2260                    /* match stretches into both external dictionary and current block */
2261                    size_t const copySize = (size_t)(lowPrefix - match);
2262                    size_t const restSize = length - copySize;
2263                    LZ4_memcpy(op, dictEnd - copySize, copySize);
2264                    op += copySize;
2265                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
2266                        BYTE* const endOfMatch = op + restSize;
2267                        const BYTE* copyFrom = lowPrefix;
2268                        while (op < endOfMatch) *op++ = *copyFrom++;
2269                    } else {
2270                        LZ4_memcpy(op, lowPrefix, restSize);
2271                        op += restSize;
2272                }   }
2273                continue;
2274            }
2275            assert(match >= lowPrefix);
2276
2277            /* copy match within block */
2278            cpy = op + length;
2279
2280            /* partialDecoding : may end anywhere within the block */
2281            assert(op<=oend);
2282            if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
2283                size_t const mlen = MIN(length, (size_t)(oend-op));
2284                const BYTE* const matchEnd = match + mlen;
2285                BYTE* const copyEnd = op + mlen;
2286                if (matchEnd > op) {   /* overlap copy */
2287                    while (op < copyEnd) { *op++ = *match++; }
2288                } else {
2289                    LZ4_memcpy(op, match, mlen);
2290                }
2291                op = copyEnd;
2292                if (op == oend) { break; }
2293                continue;
2294            }
2295
2296            if (unlikely(offset<8)) {
2297                LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
2298                op[0] = match[0];
2299                op[1] = match[1];
2300                op[2] = match[2];
2301                op[3] = match[3];
2302                match += inc32table[offset];
2303                LZ4_memcpy(op+4, match, 4);
2304                match -= dec64table[offset];
2305            } else {
2306                LZ4_memcpy(op, match, 8);
2307                match += 8;
2308            }
2309            op += 8;
2310
2311            if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
2312                BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
2313                if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
2314                if (op < oCopyLimit) {
2315                    LZ4_wildCopy8(op, match, oCopyLimit);
2316                    match += oCopyLimit - op;
2317                    op = oCopyLimit;
2318                }
2319                while (op < cpy) { *op++ = *match++; }
2320            } else {
2321                LZ4_memcpy(op, match, 8);
2322                if (length > 16)  { LZ4_wildCopy8(op+8, match+8, cpy); }
2323            }
2324            op = cpy;   /* wildcopy correction */
2325        }
2326
2327        /* end of decoding */
2328        DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
2329        return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
2330
2331        /* Overflow error detected */
2332    _output_error:
2333        return (int) (-(((const char*)ip)-src))-1;
2334    }
2335}
2336
2337
2338/*===== Instantiate the API decoding functions. =====*/
2339
2340LZ4_FORCE_O2
2341int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
2342{
2343    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
2344                                  decode_full_block, noDict,
2345                                  (BYTE*)dest, NULL, 0);
2346}
2347
2348LZ4_FORCE_O2
2349int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
2350{
2351    dstCapacity = MIN(targetOutputSize, dstCapacity);
2352    return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
2353                                  partial_decode,
2354                                  noDict, (BYTE*)dst, NULL, 0);
2355}
2356
2357LZ4_FORCE_O2
2358int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
2359{
2360    DEBUGLOG(5, "LZ4_decompress_fast");
2361    return LZ4_decompress_unsafe_generic(
2362                (const BYTE*)source, (BYTE*)dest, originalSize,
2363                0, NULL, 0);
2364}
2365
2366/*===== Instantiate a few more decoding cases, used more than once. =====*/
2367
2368LZ4_FORCE_O2 /* Exported, an obsolete API function. */
2369int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
2370{
2371    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
2372                                  decode_full_block, withPrefix64k,
2373                                  (BYTE*)dest - 64 KB, NULL, 0);
2374}
2375
2376LZ4_FORCE_O2
2377static int LZ4_decompress_safe_partial_withPrefix64k(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity)
2378{
2379    dstCapacity = MIN(targetOutputSize, dstCapacity);
2380    return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
2381                                  partial_decode, withPrefix64k,
2382                                  (BYTE*)dest - 64 KB, NULL, 0);
2383}
2384
2385/* Another obsolete API function, paired with the previous one. */
2386int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
2387{
2388    return LZ4_decompress_unsafe_generic(
2389                (const BYTE*)source, (BYTE*)dest, originalSize,
2390                64 KB, NULL, 0);
2391}
2392
2393LZ4_FORCE_O2
2394static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
2395                                               size_t prefixSize)
2396{
2397    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
2398                                  decode_full_block, noDict,
2399                                  (BYTE*)dest-prefixSize, NULL, 0);
2400}
2401
2402LZ4_FORCE_O2
2403static int LZ4_decompress_safe_partial_withSmallPrefix(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity,
2404                                               size_t prefixSize)
2405{
2406    dstCapacity = MIN(targetOutputSize, dstCapacity);
2407    return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
2408                                  partial_decode, noDict,
2409                                  (BYTE*)dest-prefixSize, NULL, 0);
2410}
2411
2412LZ4_FORCE_O2
2413int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
2414                                     int compressedSize, int maxOutputSize,
2415                                     const void* dictStart, size_t dictSize)
2416{
2417    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
2418                                  decode_full_block, usingExtDict,
2419                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
2420}
2421
2422LZ4_FORCE_O2
2423int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest,
2424                                     int compressedSize, int targetOutputSize, int dstCapacity,
2425                                     const void* dictStart, size_t dictSize)
2426{
2427    dstCapacity = MIN(targetOutputSize, dstCapacity);
2428    return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
2429                                  partial_decode, usingExtDict,
2430                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
2431}
2432
2433LZ4_FORCE_O2
2434static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
2435                                       const void* dictStart, size_t dictSize)
2436{
2437    return LZ4_decompress_unsafe_generic(
2438                (const BYTE*)source, (BYTE*)dest, originalSize,
2439                0, (const BYTE*)dictStart, dictSize);
2440}
2441
2442/* The "double dictionary" mode, for use with e.g. ring buffers: the first part
2443 * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
2444 * These routines are used only once, in LZ4_decompress_*_continue().
2445 */
2446LZ4_FORCE_INLINE
2447int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
2448                                   size_t prefixSize, const void* dictStart, size_t dictSize)
2449{
2450    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
2451                                  decode_full_block, usingExtDict,
2452                                  (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
2453}
2454
2455/*===== streaming decompression functions =====*/
2456
2457#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
2458LZ4_streamDecode_t* LZ4_createStreamDecode(void)
2459{
2460    LZ4_STATIC_ASSERT(sizeof(LZ4_streamDecode_t) >= sizeof(LZ4_streamDecode_t_internal));
2461    return (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
2462}
2463
2464int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
2465{
2466    if (LZ4_stream == NULL) { return 0; }  /* support free on NULL */
2467    FREEMEM(LZ4_stream);
2468    return 0;
2469}
2470#endif
2471
2472/*! LZ4_setStreamDecode() :
2473 *  Use this function to instruct where to find the dictionary.
2474 *  This function is not necessary if previous data is still available where it was decoded.
2475 *  Loading a size of 0 is allowed (same effect as no dictionary).
2476 * @return : 1 if OK, 0 if error
2477 */
2478int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
2479{
2480    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
2481    lz4sd->prefixSize = (size_t)dictSize;
2482    if (dictSize) {
2483        assert(dictionary != NULL);
2484        lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
2485    } else {
2486        lz4sd->prefixEnd = (const BYTE*) dictionary;
2487    }
2488    lz4sd->externalDict = NULL;
2489    lz4sd->extDictSize  = 0;
2490    return 1;
2491}
2492
2493/*! LZ4_decoderRingBufferSize() :
2494 *  when setting a ring buffer for streaming decompression (optional scenario),
2495 *  provides the minimum size of this ring buffer
2496 *  to be compatible with any source respecting maxBlockSize condition.
2497 *  Note : in a ring buffer scenario,
2498 *  blocks are presumed decompressed next to each other.
2499 *  When not enough space remains for next block (remainingSize < maxBlockSize),
2500 *  decoding resumes from beginning of ring buffer.
2501 * @return : minimum ring buffer size,
2502 *           or 0 if there is an error (invalid maxBlockSize).
2503 */
2504int LZ4_decoderRingBufferSize(int maxBlockSize)
2505{
2506    if (maxBlockSize < 0) return 0;
2507    if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
2508    if (maxBlockSize < 16) maxBlockSize = 16;
2509    return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
2510}
2511
2512/*
2513*_continue() :
2514    These decoding functions allow decompression of multiple blocks in "streaming" mode.
2515    Previously decoded blocks must still be available at the memory position where they were decoded.
2516    If it's not possible, save the relevant part of decoded data into a safe buffer,
2517    and indicate where it stands using LZ4_setStreamDecode()
2518*/
2519LZ4_FORCE_O2
2520int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
2521{
2522    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
2523    int result;
2524
2525    if (lz4sd->prefixSize == 0) {
2526        /* The first call, no dictionary yet. */
2527        assert(lz4sd->extDictSize == 0);
2528        result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
2529        if (result <= 0) return result;
2530        lz4sd->prefixSize = (size_t)result;
2531        lz4sd->prefixEnd = (BYTE*)dest + result;
2532    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
2533        /* They're rolling the current segment. */
2534        if (lz4sd->prefixSize >= 64 KB - 1)
2535            result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
2536        else if (lz4sd->extDictSize == 0)
2537            result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
2538                                                         lz4sd->prefixSize);
2539        else
2540            result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
2541                                                    lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
2542        if (result <= 0) return result;
2543        lz4sd->prefixSize += (size_t)result;
2544        lz4sd->prefixEnd  += result;
2545    } else {
2546        /* The buffer wraps around, or they're switching to another buffer. */
2547        lz4sd->extDictSize = lz4sd->prefixSize;
2548        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
2549        result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
2550                                                  lz4sd->externalDict, lz4sd->extDictSize);
2551        if (result <= 0) return result;
2552        lz4sd->prefixSize = (size_t)result;
2553        lz4sd->prefixEnd  = (BYTE*)dest + result;
2554    }
2555
2556    return result;
2557}
2558
2559LZ4_FORCE_O2 int
2560LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode,
2561                        const char* source, char* dest, int originalSize)
2562{
2563    LZ4_streamDecode_t_internal* const lz4sd =
2564        (assert(LZ4_streamDecode!=NULL), &LZ4_streamDecode->internal_donotuse);
2565    int result;
2566
2567    DEBUGLOG(5, "LZ4_decompress_fast_continue (toDecodeSize=%i)", originalSize);
2568    assert(originalSize >= 0);
2569
2570    if (lz4sd->prefixSize == 0) {
2571        DEBUGLOG(5, "first invocation : no prefix nor extDict");
2572        assert(lz4sd->extDictSize == 0);
2573        result = LZ4_decompress_fast(source, dest, originalSize);
2574        if (result <= 0) return result;
2575        lz4sd->prefixSize = (size_t)originalSize;
2576        lz4sd->prefixEnd = (BYTE*)dest + originalSize;
2577    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
2578        DEBUGLOG(5, "continue using existing prefix");
2579        result = LZ4_decompress_unsafe_generic(
2580                        (const BYTE*)source, (BYTE*)dest, originalSize,
2581                        lz4sd->prefixSize,
2582                        lz4sd->externalDict, lz4sd->extDictSize);
2583        if (result <= 0) return result;
2584        lz4sd->prefixSize += (size_t)originalSize;
2585        lz4sd->prefixEnd  += originalSize;
2586    } else {
2587        DEBUGLOG(5, "prefix becomes extDict");
2588        lz4sd->extDictSize = lz4sd->prefixSize;
2589        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
2590        result = LZ4_decompress_fast_extDict(source, dest, originalSize,
2591                                             lz4sd->externalDict, lz4sd->extDictSize);
2592        if (result <= 0) return result;
2593        lz4sd->prefixSize = (size_t)originalSize;
2594        lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
2595    }
2596
2597    return result;
2598}
2599
2600
2601/*
2602Advanced decoding functions :
2603*_usingDict() :
2604    These decoding functions work the same as "_continue" ones,
2605    the dictionary must be explicitly provided within parameters
2606*/
2607
2608int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
2609{
2610    if (dictSize==0)
2611        return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
2612    if (dictStart+dictSize == dest) {
2613        if (dictSize >= 64 KB - 1) {
2614            return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
2615        }
2616        assert(dictSize >= 0);
2617        return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
2618    }
2619    assert(dictSize >= 0);
2620    return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize);
2621}
2622
2623int LZ4_decompress_safe_partial_usingDict(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, const char* dictStart, int dictSize)
2624{
2625    if (dictSize==0)
2626        return LZ4_decompress_safe_partial(source, dest, compressedSize, targetOutputSize, dstCapacity);
2627    if (dictStart+dictSize == dest) {
2628        if (dictSize >= 64 KB - 1) {
2629            return LZ4_decompress_safe_partial_withPrefix64k(source, dest, compressedSize, targetOutputSize, dstCapacity);
2630        }
2631        assert(dictSize >= 0);
2632        return LZ4_decompress_safe_partial_withSmallPrefix(source, dest, compressedSize, targetOutputSize, dstCapacity, (size_t)dictSize);
2633    }
2634    assert(dictSize >= 0);
2635    return LZ4_decompress_safe_partial_forceExtDict(source, dest, compressedSize, targetOutputSize, dstCapacity, dictStart, (size_t)dictSize);
2636}
2637
2638int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
2639{
2640    if (dictSize==0 || dictStart+dictSize == dest)
2641        return LZ4_decompress_unsafe_generic(
2642                        (const BYTE*)source, (BYTE*)dest, originalSize,
2643                        (size_t)dictSize, NULL, 0);
2644    assert(dictSize >= 0);
2645    return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize);
2646}
2647
2648
2649/*=*************************************************
2650*  Obsolete Functions
2651***************************************************/
2652/* obsolete compression functions */
2653int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
2654{
2655    return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
2656}
2657int LZ4_compress(const char* src, char* dest, int srcSize)
2658{
2659    return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
2660}
2661int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
2662{
2663    return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
2664}
2665int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
2666{
2667    return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
2668}
2669int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
2670{
2671    return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
2672}
2673int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
2674{
2675    return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
2676}
2677
2678/*
2679These decompression functions are deprecated and should no longer be used.
2680They are only provided here for compatibility with older user programs.
2681- LZ4_uncompress is totally equivalent to LZ4_decompress_fast
2682- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
2683*/
2684int LZ4_uncompress (const char* source, char* dest, int outputSize)
2685{
2686    return LZ4_decompress_fast(source, dest, outputSize);
2687}
2688int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
2689{
2690    return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
2691}
2692
2693/* Obsolete Streaming functions */
2694
2695int LZ4_sizeofStreamState(void) { return sizeof(LZ4_stream_t); }
2696
2697int LZ4_resetStreamState(void* state, char* inputBuffer)
2698{
2699    (void)inputBuffer;
2700    LZ4_resetStream((LZ4_stream_t*)state);
2701    return 0;
2702}
2703
2704#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
2705void* LZ4_create (char* inputBuffer)
2706{
2707    (void)inputBuffer;
2708    return LZ4_createStream();
2709}
2710#endif
2711
2712char* LZ4_slideInputBuffer (void* state)
2713{
2714    /* avoid const char * -> char * conversion warning */
2715    return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
2716}
2717
2718#endif   /* LZ4_COMMONDEFS_ONLY */
2719
2720}