Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'zstd-linus-v6.15-rc1' of https://github.com/terrelln/linux

Pull zstd updates from Nick Terrell:
"Update zstd to the latest upstream release v1.5.7.

The two major motivations for updating Zstandard are to keep the code
up to date, and to expose API's needed by Intel for the QAT
compression accelerator.

Imported cleanly from the upstream tag v1.5.7-kernel, which is signed
by upstream's signing key EF8FE99528B52FFD"

Link: https://github.com/facebook/zstd/releases/tag/v1.5.7
Link: https://github.com/facebook/zstd/releases/tag/v1.5.7-kernel
Link: https://keyserver.ubuntu.com/pks/lookup?search=EF8FE99528B52FFD&fingerprint=on&op=index

* tag 'zstd-linus-v6.15-rc1' of https://github.com/terrelln/linux:
zstd: Import upstream v1.5.7

+9040 -4672
+82 -5
include/linux/zstd.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 2 2 /* 3 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 4 4 * All rights reserved. 5 5 * 6 6 * This source code is licensed under both the BSD-style license (found in the ··· 160 160 zstd_parameters zstd_get_params(int level, 161 161 unsigned long long estimated_src_size); 162 162 163 - 164 163 /** 165 164 * zstd_get_cparams() - returns zstd_compression_parameters for selected level 166 165 * @level: The compression level ··· 172 173 zstd_compression_parameters zstd_get_cparams(int level, 173 174 unsigned long long estimated_src_size, size_t dict_size); 174 175 175 - /* ====== Single-pass Compression ====== */ 176 - 177 176 typedef ZSTD_CCtx zstd_cctx; 177 + typedef ZSTD_cParameter zstd_cparameter; 178 + 179 + /** 180 + * zstd_cctx_set_param() - sets a compression parameter 181 + * @cctx: The context. Must have been initialized with zstd_init_cctx(). 182 + * @param: The parameter to set. 183 + * @value: The value to set the parameter to. 184 + * 185 + * Return: Zero or an error, which can be checked using zstd_is_error(). 186 + */ 187 + size_t zstd_cctx_set_param(zstd_cctx *cctx, zstd_cparameter param, int value); 188 + 189 + /* ====== Single-pass Compression ====== */ 178 190 179 191 /** 180 192 * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx ··· 199 189 * zstd_init_cctx(). 200 190 */ 201 191 size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters); 192 + 193 + /** 194 + * zstd_cctx_workspace_bound_with_ext_seq_prod() - max memory needed to 195 + * initialize a zstd_cctx when using the block-level external sequence 196 + * producer API. 197 + * @parameters: The compression parameters to be used. 198 + * 199 + * If multiple compression parameters might be used, the caller must call 200 + * this function for each set of parameters and use the maximum size. 201 + * 202 + * Return: A lower bound on the size of the workspace that is passed to 203 + * zstd_init_cctx(). 204 + */ 205 + size_t zstd_cctx_workspace_bound_with_ext_seq_prod(const zstd_compression_parameters *parameters); 202 206 203 207 /** 204 208 * zstd_init_cctx() - initialize a zstd compression context ··· 449 425 size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams); 450 426 451 427 /** 428 + * zstd_cstream_workspace_bound_with_ext_seq_prod() - memory needed to initialize 429 + * a zstd_cstream when using the block-level external sequence producer API. 430 + * @cparams: The compression parameters to be used for compression. 431 + * 432 + * Return: A lower bound on the size of the workspace that is passed to 433 + * zstd_init_cstream(). 434 + */ 435 + size_t zstd_cstream_workspace_bound_with_ext_seq_prod(const zstd_compression_parameters *cparams); 436 + 437 + /** 452 438 * zstd_init_cstream() - initialize a zstd streaming compression context 453 439 * @parameters The zstd parameters to use for compression. 454 440 * @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller ··· 618 584 size_t zstd_find_frame_compressed_size(const void *src, size_t src_size); 619 585 620 586 /** 587 + * zstd_register_sequence_producer() - exposes the zstd library function 588 + * ZSTD_registerSequenceProducer(). This is used for the block-level external 589 + * sequence producer API. See upstream zstd.h for detailed documentation. 590 + */ 591 + typedef ZSTD_sequenceProducer_F zstd_sequence_producer_f; 592 + void zstd_register_sequence_producer( 593 + zstd_cctx *cctx, 594 + void* sequence_producer_state, 595 + zstd_sequence_producer_f sequence_producer 596 + ); 597 + 598 + /** 621 599 * struct zstd_frame_params - zstd frame parameters stored in the frame header 622 600 * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not 623 601 * present. ··· 642 596 * 643 597 * See zstd_lib.h. 644 598 */ 645 - typedef ZSTD_frameHeader zstd_frame_header; 599 + typedef ZSTD_FrameHeader zstd_frame_header; 646 600 647 601 /** 648 602 * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame ··· 656 610 */ 657 611 size_t zstd_get_frame_header(zstd_frame_header *params, const void *src, 658 612 size_t src_size); 613 + 614 + /** 615 + * struct zstd_sequence - a sequence of literals or a match 616 + * 617 + * @offset: The offset of the match 618 + * @litLength: The literal length of the sequence 619 + * @matchLength: The match length of the sequence 620 + * @rep: Represents which repeat offset is used 621 + */ 622 + typedef ZSTD_Sequence zstd_sequence; 623 + 624 + /** 625 + * zstd_compress_sequences_and_literals() - compress an array of zstd_sequence and literals 626 + * 627 + * @cctx: The zstd compression context. 628 + * @dst: The buffer to compress the data into. 629 + * @dst_capacity: The size of the destination buffer. 630 + * @in_seqs: The array of zstd_sequence to compress. 631 + * @in_seqs_size: The number of sequences in in_seqs. 632 + * @literals: The literals associated to the sequences to be compressed. 633 + * @lit_size: The size of the literals in the literals buffer. 634 + * @lit_capacity: The size of the literals buffer. 635 + * @decompressed_size: The size of the input data 636 + * 637 + * Return: The compressed size or an error, which can be checked using 638 + * zstd_is_error(). 639 + */ 640 + size_t zstd_compress_sequences_and_literals(zstd_cctx *cctx, void* dst, size_t dst_capacity, 641 + const zstd_sequence *in_seqs, size_t in_seqs_size, 642 + const void* literals, size_t lit_size, size_t lit_capacity, 643 + size_t decompressed_size); 659 644 660 645 #endif /* LINUX_ZSTD_H */
+21 -11
include/linux/zstd_errors.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 13 12 #define ZSTD_ERRORS_H_398273423 14 13 15 14 16 - /*===== dependency =====*/ 17 - #include <linux/types.h> /* size_t */ 18 - 19 - 20 15 /* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ 21 - #define ZSTDERRORLIB_VISIBILITY 22 - #define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY 16 + #define ZSTDERRORLIB_VISIBLE 17 + 18 + #ifndef ZSTDERRORLIB_HIDDEN 19 + # if (__GNUC__ >= 4) && !defined(__MINGW32__) 20 + # define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden"))) 21 + # else 22 + # define ZSTDERRORLIB_HIDDEN 23 + # endif 24 + #endif 25 + 26 + #define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE 23 27 24 28 /*-********************************************* 25 29 * Error codes list ··· 49 43 ZSTD_error_frameParameter_windowTooLarge = 16, 50 44 ZSTD_error_corruption_detected = 20, 51 45 ZSTD_error_checksum_wrong = 22, 46 + ZSTD_error_literals_headerWrong = 24, 52 47 ZSTD_error_dictionary_corrupted = 30, 53 48 ZSTD_error_dictionary_wrong = 32, 54 49 ZSTD_error_dictionaryCreation_failed = 34, 55 50 ZSTD_error_parameter_unsupported = 40, 51 + ZSTD_error_parameter_combination_unsupported = 41, 56 52 ZSTD_error_parameter_outOfBound = 42, 57 53 ZSTD_error_tableLog_tooLarge = 44, 58 54 ZSTD_error_maxSymbolValue_tooLarge = 46, 59 55 ZSTD_error_maxSymbolValue_tooSmall = 48, 56 + ZSTD_error_cannotProduce_uncompressedBlock = 49, 57 + ZSTD_error_stabilityCondition_notRespected = 50, 60 58 ZSTD_error_stage_wrong = 60, 61 59 ZSTD_error_init_missing = 62, 62 60 ZSTD_error_memory_allocation = 64, ··· 68 58 ZSTD_error_dstSize_tooSmall = 70, 69 59 ZSTD_error_srcSize_wrong = 72, 70 60 ZSTD_error_dstBuffer_null = 74, 61 + ZSTD_error_noForwardProgress_destFull = 80, 62 + ZSTD_error_noForwardProgress_inputEmpty = 82, 71 63 /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ 72 64 ZSTD_error_frameIndex_tooLarge = 100, 73 65 ZSTD_error_seekableIO = 102, 74 66 ZSTD_error_dstBuffer_wrong = 104, 75 67 ZSTD_error_srcBuffer_wrong = 105, 68 + ZSTD_error_sequenceProducer_failed = 106, 69 + ZSTD_error_externalSequences_invalid = 107, 76 70 ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ 77 71 } ZSTD_ErrorCode; 78 72 79 - /*! ZSTD_getErrorCode() : 80 - convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, 81 - which can be used to compare with enum list published above */ 82 - ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); 83 73 ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /*< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ 84 74 85 75
+865 -256
include/linux/zstd_lib.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 12 11 #ifndef ZSTD_H_235446 13 12 #define ZSTD_H_235446 14 13 15 - /* ====== Dependency ======*/ 16 - #include <linux/limits.h> /* INT_MAX */ 14 + 15 + /* ====== Dependencies ======*/ 17 16 #include <linux/types.h> /* size_t */ 17 + 18 + #include <linux/zstd_errors.h> /* list of errors */ 19 + #if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) 20 + #include <linux/limits.h> /* INT_MAX */ 21 + #endif /* ZSTD_STATIC_LINKING_ONLY */ 18 22 19 23 20 24 /* ===== ZSTDLIB_API : control library symbols visibility ===== */ 21 - #ifndef ZSTDLIB_VISIBLE 25 + #define ZSTDLIB_VISIBLE 26 + 27 + #ifndef ZSTDLIB_HIDDEN 22 28 # if (__GNUC__ >= 4) && !defined(__MINGW32__) 23 - # define ZSTDLIB_VISIBLE __attribute__ ((visibility ("default"))) 24 29 # define ZSTDLIB_HIDDEN __attribute__ ((visibility ("hidden"))) 25 30 # else 26 - # define ZSTDLIB_VISIBLE 27 31 # define ZSTDLIB_HIDDEN 28 32 # endif 29 33 #endif 34 + 30 35 #define ZSTDLIB_API ZSTDLIB_VISIBLE 36 + 37 + /* Deprecation warnings : 38 + * Should these warnings be a problem, it is generally possible to disable them, 39 + * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual. 40 + * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS. 41 + */ 42 + #ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS 43 + # define ZSTD_DEPRECATED(message) /* disable deprecation warnings */ 44 + #else 45 + # if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) || defined(__IAR_SYSTEMS_ICC__) 46 + # define ZSTD_DEPRECATED(message) __attribute__((deprecated(message))) 47 + # elif (__GNUC__ >= 3) 48 + # define ZSTD_DEPRECATED(message) __attribute__((deprecated)) 49 + # else 50 + # pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler") 51 + # define ZSTD_DEPRECATED(message) 52 + # endif 53 + #endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */ 31 54 32 55 33 56 /* ***************************************************************************** ··· 90 65 /*------ Version ------*/ 91 66 #define ZSTD_VERSION_MAJOR 1 92 67 #define ZSTD_VERSION_MINOR 5 93 - #define ZSTD_VERSION_RELEASE 2 68 + #define ZSTD_VERSION_RELEASE 7 94 69 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) 95 70 96 71 /*! ZSTD_versionNumber() : ··· 128 103 129 104 130 105 /* ************************************* 131 - * Simple API 106 + * Simple Core API 132 107 ***************************************/ 133 108 /*! ZSTD_compress() : 134 109 * Compresses `src` content as a single zstd compressed frame into already allocated `dst`. 135 - * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. 110 + * NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have 111 + * enough space to successfully compress the data. 136 112 * @return : compressed size written into `dst` (<= `dstCapacity), 137 113 * or an error code if it fails (which can be tested using ZSTD_isError()). */ 138 114 ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, ··· 141 115 int compressionLevel); 142 116 143 117 /*! ZSTD_decompress() : 144 - * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. 145 - * `dstCapacity` is an upper bound of originalSize to regenerate. 146 - * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. 147 - * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), 148 - * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ 118 + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. 119 + * Multiple compressed frames can be decompressed at once with this method. 120 + * The result will be the concatenation of all decompressed frames, back to back. 121 + * `dstCapacity` is an upper bound of originalSize to regenerate. 122 + * First frame's decompressed size can be extracted using ZSTD_getFrameContentSize(). 123 + * If maximum upper bound isn't known, prefer using streaming mode to decompress data. 124 + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), 125 + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ 149 126 ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, 150 127 const void* src, size_t compressedSize); 151 128 129 + 130 + /*====== Decompression helper functions ======*/ 131 + 152 132 /*! ZSTD_getFrameContentSize() : requires v1.3.0+ 153 - * `src` should point to the start of a ZSTD encoded frame. 154 - * `srcSize` must be at least as large as the frame header. 155 - * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. 156 - * @return : - decompressed size of `src` frame content, if known 157 - * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined 158 - * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) 159 - * note 1 : a 0 return value means the frame is valid but "empty". 160 - * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. 161 - * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. 162 - * In which case, it's necessary to use streaming mode to decompress data. 163 - * Optionally, application can rely on some implicit limit, 164 - * as ZSTD_decompress() only needs an upper bound of decompressed size. 165 - * (For example, data could be necessarily cut into blocks <= 16 KB). 166 - * note 3 : decompressed size is always present when compression is completed using single-pass functions, 167 - * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). 168 - * note 4 : decompressed size can be very large (64-bits value), 169 - * potentially larger than what local system can handle as a single memory segment. 170 - * In which case, it's necessary to use streaming mode to decompress data. 171 - * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. 172 - * Always ensure return value fits within application's authorized limits. 173 - * Each application can set its own limits. 174 - * note 6 : This function replaces ZSTD_getDecompressedSize() */ 133 + * `src` should point to the start of a ZSTD encoded frame. 134 + * `srcSize` must be at least as large as the frame header. 135 + * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. 136 + * @return : - decompressed size of `src` frame content, if known 137 + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined 138 + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) 139 + * note 1 : a 0 return value means the frame is valid but "empty". 140 + * When invoking this method on a skippable frame, it will return 0. 141 + * note 2 : decompressed size is an optional field, it may not be present (typically in streaming mode). 142 + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. 143 + * In which case, it's necessary to use streaming mode to decompress data. 144 + * Optionally, application can rely on some implicit limit, 145 + * as ZSTD_decompress() only needs an upper bound of decompressed size. 146 + * (For example, data could be necessarily cut into blocks <= 16 KB). 147 + * note 3 : decompressed size is always present when compression is completed using single-pass functions, 148 + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). 149 + * note 4 : decompressed size can be very large (64-bits value), 150 + * potentially larger than what local system can handle as a single memory segment. 151 + * In which case, it's necessary to use streaming mode to decompress data. 152 + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. 153 + * Always ensure return value fits within application's authorized limits. 154 + * Each application can set its own limits. 155 + * note 6 : This function replaces ZSTD_getDecompressedSize() */ 175 156 #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) 176 157 #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) 177 158 ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); 178 159 179 - /*! ZSTD_getDecompressedSize() : 180 - * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). 160 + /*! ZSTD_getDecompressedSize() (obsolete): 161 + * This function is now obsolete, in favor of ZSTD_getFrameContentSize(). 181 162 * Both functions work the same way, but ZSTD_getDecompressedSize() blends 182 163 * "empty", "unknown" and "error" results to the same return value (0), 183 164 * while ZSTD_getFrameContentSize() gives them separate return values. 184 165 * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ 166 + ZSTD_DEPRECATED("Replaced by ZSTD_getFrameContentSize") 185 167 ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); 186 168 187 169 /*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+ ··· 197 163 * `srcSize` must be >= first frame size 198 164 * @return : the compressed size of the first frame starting at `src`, 199 165 * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, 200 - * or an error code if input is invalid */ 166 + * or an error code if input is invalid 167 + * Note 1: this method is called _find*() because it's not enough to read the header, 168 + * it may have to scan through the frame's content, to reach its end. 169 + * Note 2: this method also works with Skippable Frames. In which case, 170 + * it returns the size of the complete skippable frame, 171 + * which is always equal to its content size + 8 bytes for headers. */ 201 172 ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); 202 173 203 174 204 - /*====== Helper functions ======*/ 205 - #define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ 206 - ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ 207 - ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ 208 - ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ 209 - ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */ 210 - ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ 211 - ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */ 175 + /*====== Compression helper functions ======*/ 176 + 177 + /*! ZSTD_compressBound() : 178 + * maximum compressed size in worst case single-pass scenario. 179 + * When invoking `ZSTD_compress()`, or any other one-pass compression function, 180 + * it's recommended to provide @dstCapacity >= ZSTD_compressBound(srcSize) 181 + * as it eliminates one potential failure scenario, 182 + * aka not enough room in dst buffer to write the compressed frame. 183 + * Note : ZSTD_compressBound() itself can fail, if @srcSize >= ZSTD_MAX_INPUT_SIZE . 184 + * In which case, ZSTD_compressBound() will return an error code 185 + * which can be tested using ZSTD_isError(). 186 + * 187 + * ZSTD_COMPRESSBOUND() : 188 + * same as ZSTD_compressBound(), but as a macro. 189 + * It can be used to produce constants, which can be useful for static allocation, 190 + * for example to size a static array on stack. 191 + * Will produce constant value 0 if srcSize is too large. 192 + */ 193 + #define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U) 194 + #define ZSTD_COMPRESSBOUND(srcSize) (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ 195 + ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ 196 + 197 + 198 + /*====== Error helper functions ======*/ 199 + /* ZSTD_isError() : 200 + * Most ZSTD_* functions returning a size_t value can be tested for error, 201 + * using ZSTD_isError(). 202 + * @return 1 if error, 0 otherwise 203 + */ 204 + ZSTDLIB_API unsigned ZSTD_isError(size_t result); /*!< tells if a `size_t` function result is an error code */ 205 + ZSTDLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); /* convert a result into an error code, which can be compared to error enum list */ 206 + ZSTDLIB_API const char* ZSTD_getErrorName(size_t result); /*!< provides readable string from a function result */ 207 + ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */ 208 + ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ 209 + ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */ 212 210 213 211 214 212 /* ************************************* ··· 248 182 ***************************************/ 249 183 /*= Compression context 250 184 * When compressing many times, 251 - * it is recommended to allocate a context just once, 252 - * and re-use it for each successive compression operation. 253 - * This will make workload friendlier for system's memory. 185 + * it is recommended to allocate a compression context just once, 186 + * and reuse it for each successive compression operation. 187 + * This will make the workload easier for system's memory. 254 188 * Note : re-using context is just a speed / resource optimization. 255 189 * It doesn't change the compression ratio, which remains identical. 256 - * Note 2 : In multi-threaded environments, 257 - * use one different context per thread for parallel execution. 190 + * Note 2: For parallel execution in multi-threaded environments, 191 + * use one different context per thread . 258 192 */ 259 193 typedef struct ZSTD_CCtx_s ZSTD_CCtx; 260 194 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); 261 - ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer */ 195 + ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* compatible with NULL pointer */ 262 196 263 197 /*! ZSTD_compressCCtx() : 264 198 * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. 265 - * Important : in order to behave similarly to `ZSTD_compress()`, 266 - * this function compresses at requested compression level, 267 - * __ignoring any other parameter__ . 199 + * Important : in order to mirror `ZSTD_compress()` behavior, 200 + * this function compresses at the requested compression level, 201 + * __ignoring any other advanced parameter__ . 268 202 * If any advanced parameter was set using the advanced API, 269 - * they will all be reset. Only `compressionLevel` remains. 203 + * they will all be reset. Only @compressionLevel remains. 270 204 */ 271 205 ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, 272 206 void* dst, size_t dstCapacity, ··· 276 210 /*= Decompression context 277 211 * When decompressing many times, 278 212 * it is recommended to allocate a context only once, 279 - * and re-use it for each successive compression operation. 213 + * and reuse it for each successive compression operation. 280 214 * This will make workload friendlier for system's memory. 281 215 * Use one context per thread for parallel execution. */ 282 216 typedef struct ZSTD_DCtx_s ZSTD_DCtx; ··· 286 220 /*! ZSTD_decompressDCtx() : 287 221 * Same as ZSTD_decompress(), 288 222 * requires an allocated ZSTD_DCtx. 289 - * Compatible with sticky parameters. 223 + * Compatible with sticky parameters (see below). 290 224 */ 291 225 ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, 292 226 void* dst, size_t dstCapacity, ··· 302 236 * using ZSTD_CCtx_set*() functions. 303 237 * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. 304 238 * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! 305 - * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . 239 + * __They do not apply to one-shot variants such as ZSTD_compressCCtx()__ . 306 240 * 307 241 * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). 308 242 * 309 243 * This API supersedes all other "advanced" API entry points in the experimental section. 310 - * In the future, we expect to remove from experimental API entry points which are redundant with this API. 244 + * In the future, we expect to remove API entry points from experimental which are redundant with this API. 311 245 */ 312 246 313 247 ··· 390 324 * The higher the value of selected strategy, the more complex it is, 391 325 * resulting in stronger and slower compression. 392 326 * Special: value 0 means "use default strategy". */ 327 + 328 + ZSTD_c_targetCBlockSize=130, /* v1.5.6+ 329 + * Attempts to fit compressed block size into approximately targetCBlockSize. 330 + * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX. 331 + * Note that it's not a guarantee, just a convergence target (default:0). 332 + * No target when targetCBlockSize == 0. 333 + * This is helpful in low bandwidth streaming environments to improve end-to-end latency, 334 + * when a client can make use of partial documents (a prominent example being Chrome). 335 + * Note: this parameter is stable since v1.5.6. 336 + * It was present as an experimental parameter in earlier versions, 337 + * but it's not recommended using it with earlier library versions 338 + * due to massive performance regressions. 339 + */ 393 340 /* LDM mode parameters */ 394 341 ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. 395 342 * This parameter is designed to improve compression ratio ··· 482 403 * ZSTD_c_forceMaxWindow 483 404 * ZSTD_c_forceAttachDict 484 405 * ZSTD_c_literalCompressionMode 485 - * ZSTD_c_targetCBlockSize 486 406 * ZSTD_c_srcSizeHint 487 407 * ZSTD_c_enableDedicatedDictSearch 488 408 * ZSTD_c_stableInBuffer 489 409 * ZSTD_c_stableOutBuffer 490 410 * ZSTD_c_blockDelimiters 491 411 * ZSTD_c_validateSequences 492 - * ZSTD_c_useBlockSplitter 412 + * ZSTD_c_blockSplitterLevel 413 + * ZSTD_c_splitAfterSequences 493 414 * ZSTD_c_useRowMatchFinder 415 + * ZSTD_c_prefetchCDictTables 416 + * ZSTD_c_enableSeqProducerFallback 417 + * ZSTD_c_maxBlockSize 494 418 * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. 495 419 * note : never ever use experimentalParam? names directly; 496 420 * also, the enums values themselves are unstable and can still change. ··· 503 421 ZSTD_c_experimentalParam3=1000, 504 422 ZSTD_c_experimentalParam4=1001, 505 423 ZSTD_c_experimentalParam5=1002, 506 - ZSTD_c_experimentalParam6=1003, 424 + /* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */ 507 425 ZSTD_c_experimentalParam7=1004, 508 426 ZSTD_c_experimentalParam8=1005, 509 427 ZSTD_c_experimentalParam9=1006, ··· 512 430 ZSTD_c_experimentalParam12=1009, 513 431 ZSTD_c_experimentalParam13=1010, 514 432 ZSTD_c_experimentalParam14=1011, 515 - ZSTD_c_experimentalParam15=1012 433 + ZSTD_c_experimentalParam15=1012, 434 + ZSTD_c_experimentalParam16=1013, 435 + ZSTD_c_experimentalParam17=1014, 436 + ZSTD_c_experimentalParam18=1015, 437 + ZSTD_c_experimentalParam19=1016, 438 + ZSTD_c_experimentalParam20=1017 516 439 } ZSTD_cParameter; 517 440 518 441 typedef struct { ··· 580 493 * They will be used to compress next frame. 581 494 * Resetting session never fails. 582 495 * - The parameters : changes all parameters back to "default". 583 - * This removes any reference to any dictionary too. 496 + * This also removes any reference to any dictionary or external sequence producer. 584 497 * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) 585 498 * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) 586 499 * - Both : similar to resetting the session, followed by resetting parameters. ··· 589 502 590 503 /*! ZSTD_compress2() : 591 504 * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. 505 + * (note that this entry point doesn't even expose a compression level parameter). 592 506 * ZSTD_compress2() always starts a new frame. 593 507 * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. 594 508 * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() 595 509 * - The function is always blocking, returns when compression is completed. 596 - * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. 510 + * NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have 511 + * enough space to successfully compress the data, though it is possible it fails for other reasons. 597 512 * @return : compressed size written into `dst` (<= `dstCapacity), 598 513 * or an error code if it fails (which can be tested using ZSTD_isError()). 599 514 */ ··· 632 543 * ZSTD_d_stableOutBuffer 633 544 * ZSTD_d_forceIgnoreChecksum 634 545 * ZSTD_d_refMultipleDDicts 546 + * ZSTD_d_disableHuffmanAssembly 547 + * ZSTD_d_maxBlockSize 635 548 * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. 636 549 * note : never ever use experimentalParam? names directly 637 550 */ 638 551 ZSTD_d_experimentalParam1=1000, 639 552 ZSTD_d_experimentalParam2=1001, 640 553 ZSTD_d_experimentalParam3=1002, 641 - ZSTD_d_experimentalParam4=1003 554 + ZSTD_d_experimentalParam4=1003, 555 + ZSTD_d_experimentalParam5=1004, 556 + ZSTD_d_experimentalParam6=1005 642 557 643 558 } ZSTD_dParameter; 644 559 ··· 697 604 * A ZSTD_CStream object is required to track streaming operation. 698 605 * Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. 699 606 * ZSTD_CStream objects can be reused multiple times on consecutive compression operations. 700 - * It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. 607 + * It is recommended to reuse ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. 701 608 * 702 609 * For parallel execution, use one separate ZSTD_CStream per thread. 703 610 * 704 611 * note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. 705 612 * 706 613 * Parameters are sticky : when starting a new compression on the same context, 707 - * it will re-use the same sticky parameters as previous compression session. 614 + * it will reuse the same sticky parameters as previous compression session. 708 615 * When in doubt, it's recommended to fully initialize the context before usage. 709 616 * Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), 710 617 * ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to ··· 793 700 * only ZSTD_e_end or ZSTD_e_flush operations are allowed. 794 701 * Before starting a new compression job, or changing compression parameters, 795 702 * it is required to fully flush internal buffers. 703 + * - note: if an operation ends with an error, it may leave @cctx in an undefined state. 704 + * Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state. 705 + * In order to be re-employed after an error, a state must be reset, 706 + * which can be done explicitly (ZSTD_CCtx_reset()), 707 + * or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx()) 796 708 */ 797 709 ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, 798 710 ZSTD_outBuffer* output, ··· 826 728 * This following is a legacy streaming API, available since v1.0+ . 827 729 * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). 828 730 * It is redundant, but remains fully supported. 829 - * Streaming in combination with advanced parameters and dictionary compression 830 - * can only be used through the new API. 831 731 ******************************************************************************/ 832 732 833 733 /*! ··· 834 738 * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 835 739 * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) 836 740 * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); 741 + * 742 + * Note that ZSTD_initCStream() clears any previously set dictionary. Use the new API 743 + * to compress with a dictionary. 837 744 */ 838 745 ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); 839 746 /*! ··· 857 758 * 858 759 * A ZSTD_DStream object is required to track streaming operations. 859 760 * Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. 860 - * ZSTD_DStream objects can be re-used multiple times. 761 + * ZSTD_DStream objects can be re-employed multiple times. 861 762 * 862 763 * Use ZSTD_initDStream() to start a new decompression operation. 863 764 * @return : recommended first input size ··· 867 768 * The function will update both `pos` fields. 868 769 * If `input.pos < input.size`, some input has not been consumed. 869 770 * It's up to the caller to present again remaining data. 771 + * 870 772 * The function tries to flush all data decoded immediately, respecting output buffer size. 871 773 * If `output.pos < output.size`, decoder has flushed everything it could. 872 - * But if `output.pos == output.size`, there might be some data left within internal buffers., 774 + * 775 + * However, when `output.pos == output.size`, it's more difficult to know. 776 + * If @return > 0, the frame is not complete, meaning 777 + * either there is still some data left to flush within internal buffers, 778 + * or there is more input to read to complete the frame (or both). 873 779 * In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. 874 780 * Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. 875 781 * @return : 0 when a frame is completely decoded and fully flushed, 876 782 * or an error code, which can be tested using ZSTD_isError(), 877 783 * or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : 878 784 * the return value is a suggested next input size (just a hint for better latency) 879 - * that will never request more than the remaining frame size. 785 + * that will never request more than the remaining content of the compressed frame. 880 786 * *******************************************************************************/ 881 787 882 788 typedef ZSTD_DCtx ZSTD_DStream; /*< DCtx and DStream are now effectively same object (>= v1.3.0) */ ··· 892 788 893 789 /*===== Streaming decompression functions =====*/ 894 790 895 - /* This function is redundant with the advanced API and equivalent to: 791 + /*! ZSTD_initDStream() : 792 + * Initialize/reset DStream state for new decompression operation. 793 + * Call before new decompression operation using same DStream. 896 794 * 795 + * Note : This function is redundant with the advanced API and equivalent to: 897 796 * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); 898 797 * ZSTD_DCtx_refDDict(zds, NULL); 899 798 */ 900 799 ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); 901 800 801 + /*! ZSTD_decompressStream() : 802 + * Streaming decompression function. 803 + * Call repetitively to consume full input updating it as necessary. 804 + * Function will update both input and output `pos` fields exposing current state via these fields: 805 + * - `input.pos < input.size`, some input remaining and caller should provide remaining input 806 + * on the next call. 807 + * - `output.pos < output.size`, decoder flushed internal output buffer. 808 + * - `output.pos == output.size`, unflushed data potentially present in the internal buffers, 809 + * check ZSTD_decompressStream() @return value, 810 + * if > 0, invoke it again to flush remaining data to output. 811 + * Note : with no additional input, amount of data flushed <= ZSTD_BLOCKSIZE_MAX. 812 + * 813 + * @return : 0 when a frame is completely decoded and fully flushed, 814 + * or an error code, which can be tested using ZSTD_isError(), 815 + * or any other value > 0, which means there is some decoding or flushing to do to complete current frame. 816 + * 817 + * Note: when an operation returns with an error code, the @zds state may be left in undefined state. 818 + * It's UB to invoke `ZSTD_decompressStream()` on such a state. 819 + * In order to re-use such a state, it must be first reset, 820 + * which can be done explicitly (`ZSTD_DCtx_reset()`), 821 + * or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`) 822 + */ 902 823 ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); 903 824 904 825 ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ ··· 1042 913 * If @return == 0, the dictID could not be decoded. 1043 914 * This could for one of the following reasons : 1044 915 * - The frame does not require a dictionary to be decoded (most common case). 1045 - * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. 916 + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden piece of information. 1046 917 * Note : this use case also happens when using a non-conformant dictionary. 1047 918 * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). 1048 919 * - This is not a Zstandard frame. ··· 1054 925 * Advanced dictionary and prefix API (Requires v1.4.0+) 1055 926 * 1056 927 * This API allows dictionaries to be used with ZSTD_compress2(), 1057 - * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). Dictionaries are sticky, and 1058 - * only reset with the context is reset with ZSTD_reset_parameters or 1059 - * ZSTD_reset_session_and_parameters. Prefixes are single-use. 928 + * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). 929 + * Dictionaries are sticky, they remain valid when same context is reused, 930 + * they only reset when the context is reset 931 + * with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters. 932 + * In contrast, Prefixes are single-use. 1060 933 ******************************************************************************/ 1061 934 1062 935 ··· 1068 937 * @result : 0, or an error code (which can be tested with ZSTD_isError()). 1069 938 * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, 1070 939 * meaning "return to no-dictionary mode". 1071 - * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. 1072 - * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). 940 + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames, 941 + * until parameters are reset, a new dictionary is loaded, or the dictionary 942 + * is explicitly invalidated by loading a NULL dictionary. 1073 943 * Note 2 : Loading a dictionary involves building tables. 1074 944 * It's also a CPU consuming operation, with non-negligible impact on latency. 1075 945 * Tables are dependent on compression parameters, and for this reason, ··· 1079 947 * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. 1080 948 * In such a case, dictionary buffer must outlive its users. 1081 949 * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() 1082 - * to precisely select how dictionary content must be interpreted. */ 950 + * to precisely select how dictionary content must be interpreted. 951 + * Note 5 : This method does not benefit from LDM (long distance mode). 952 + * If you want to employ LDM on some large dictionary content, 953 + * prefer employing ZSTD_CCtx_refPrefix() described below. 954 + */ 1083 955 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); 1084 956 1085 957 /*! ZSTD_CCtx_refCDict() : Requires v1.4.0+ 1086 - * Reference a prepared dictionary, to be used for all next compressed frames. 958 + * Reference a prepared dictionary, to be used for all future compressed frames. 1087 959 * Note that compression parameters are enforced from within CDict, 1088 960 * and supersede any compression parameter previously set within CCtx. 1089 961 * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. ··· 1106 970 * Decompression will need same prefix to properly regenerate data. 1107 971 * Compressing with a prefix is similar in outcome as performing a diff and compressing it, 1108 972 * but performs much faster, especially during decompression (compression speed is tunable with compression level). 973 + * This method is compatible with LDM (long distance mode). 1109 974 * @result : 0, or an error code (which can be tested with ZSTD_isError()). 1110 975 * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary 1111 976 * Note 1 : Prefix buffer is referenced. It **must** outlive compression. ··· 1123 986 const void* prefix, size_t prefixSize); 1124 987 1125 988 /*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+ 1126 - * Create an internal DDict from dict buffer, 1127 - * to be used to decompress next frames. 1128 - * The dictionary remains valid for all future frames, until explicitly invalidated. 989 + * Create an internal DDict from dict buffer, to be used to decompress all future frames. 990 + * The dictionary remains valid for all future frames, until explicitly invalidated, or 991 + * a new dictionary is loaded. 1129 992 * @result : 0, or an error code (which can be tested with ZSTD_isError()). 1130 993 * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, 1131 994 * meaning "return to no-dictionary mode". ··· 1149 1012 * The memory for the table is allocated on the first call to refDDict, and can be 1150 1013 * freed with ZSTD_freeDCtx(). 1151 1014 * 1015 + * If called with ZSTD_d_refMultipleDDicts disabled (the default), only one dictionary 1016 + * will be managed, and referencing a dictionary effectively "discards" any previous one. 1017 + * 1152 1018 * @result : 0, or an error code (which can be tested with ZSTD_isError()). 1153 - * Note 1 : Currently, only one dictionary can be managed. 1154 - * Referencing a new dictionary effectively "discards" any previous one. 1155 1019 * Special: referencing a NULL DDict means "return to no-dictionary mode". 1156 1020 * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. 1157 1021 */ ··· 1189 1051 ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); 1190 1052 ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); 1191 1053 1054 + 1192 1055 #endif /* ZSTD_H_235446 */ 1193 1056 1194 1057 ··· 1205 1066 #if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) 1206 1067 #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY 1207 1068 1069 + 1208 1070 /* This can be overridden externally to hide static symbols. */ 1209 1071 #ifndef ZSTDLIB_STATIC_API 1210 1072 #define ZSTDLIB_STATIC_API ZSTDLIB_VISIBLE 1211 1073 #endif 1212 - 1213 - /* Deprecation warnings : 1214 - * Should these warnings be a problem, it is generally possible to disable them, 1215 - * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual. 1216 - * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS. 1217 - */ 1218 - #ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS 1219 - # define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API /* disable deprecation warnings */ 1220 - #else 1221 - # if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) 1222 - # define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated(message))) 1223 - # elif (__GNUC__ >= 3) 1224 - # define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated)) 1225 - # else 1226 - # pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler") 1227 - # define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API 1228 - # endif 1229 - #endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */ 1230 1074 1231 1075 /* ************************************************************************************** 1232 1076 * experimental API (static linking only) ··· 1245 1123 #define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ 1246 1124 #define ZSTD_STRATEGY_MIN ZSTD_fast 1247 1125 #define ZSTD_STRATEGY_MAX ZSTD_btultra2 1126 + #define ZSTD_BLOCKSIZE_MAX_MIN (1 << 10) /* The minimum valid max blocksize. Maximum blocksizes smaller than this make compressBound() inaccurate. */ 1248 1127 1249 1128 1250 1129 #define ZSTD_OVERLAPLOG_MIN 0 ··· 1269 1146 #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) 1270 1147 1271 1148 /* Advanced parameter bounds */ 1272 - #define ZSTD_TARGETCBLOCKSIZE_MIN 64 1149 + #define ZSTD_TARGETCBLOCKSIZE_MIN 1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */ 1273 1150 #define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX 1274 1151 #define ZSTD_SRCSIZEHINT_MIN 0 1275 1152 #define ZSTD_SRCSIZEHINT_MAX INT_MAX ··· 1311 1188 * 1312 1189 * Note: This field is optional. ZSTD_generateSequences() will calculate the value of 1313 1190 * 'rep', but repeat offsets do not necessarily need to be calculated from an external 1314 - * sequence provider's perspective. For example, ZSTD_compressSequences() does not 1191 + * sequence provider perspective. For example, ZSTD_compressSequences() does not 1315 1192 * use this 'rep' field at all (as of now). 1316 1193 */ 1317 1194 } ZSTD_Sequence; ··· 1416 1293 } ZSTD_literalCompressionMode_e; 1417 1294 1418 1295 typedef enum { 1419 - /* Note: This enum controls features which are conditionally beneficial. Zstd typically will make a final 1420 - * decision on whether or not to enable the feature (ZSTD_ps_auto), but setting the switch to ZSTD_ps_enable 1421 - * or ZSTD_ps_disable allow for a force enable/disable the feature. 1296 + /* Note: This enum controls features which are conditionally beneficial. 1297 + * Zstd can take a decision on whether or not to enable the feature (ZSTD_ps_auto), 1298 + * but setting the switch to ZSTD_ps_enable or ZSTD_ps_disable force enable/disable the feature. 1422 1299 */ 1423 1300 ZSTD_ps_auto = 0, /* Let the library automatically determine whether the feature shall be enabled */ 1424 1301 ZSTD_ps_enable = 1, /* Force-enable the feature */ 1425 1302 ZSTD_ps_disable = 2 /* Do not use the feature */ 1426 - } ZSTD_paramSwitch_e; 1303 + } ZSTD_ParamSwitch_e; 1304 + #define ZSTD_paramSwitch_e ZSTD_ParamSwitch_e /* old name */ 1427 1305 1428 1306 /* ************************************* 1429 - * Frame size functions 1307 + * Frame header and size functions 1430 1308 ***************************************/ 1431 1309 1432 1310 /*! ZSTD_findDecompressedSize() : ··· 1469 1345 ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); 1470 1346 1471 1347 /*! ZSTD_frameHeaderSize() : 1472 - * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. 1348 + * srcSize must be large enough, aka >= ZSTD_FRAMEHEADERSIZE_PREFIX. 1473 1349 * @return : size of the Frame Header, 1474 1350 * or an error code (if srcSize is too small) */ 1475 1351 ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); 1476 1352 1353 + typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_FrameType_e; 1354 + #define ZSTD_frameType_e ZSTD_FrameType_e /* old name */ 1355 + typedef struct { 1356 + unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ 1357 + unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ 1358 + unsigned blockSizeMax; 1359 + ZSTD_FrameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ 1360 + unsigned headerSize; 1361 + unsigned dictID; /* for ZSTD_skippableFrame, contains the skippable magic variant [0-15] */ 1362 + unsigned checksumFlag; 1363 + unsigned _reserved1; 1364 + unsigned _reserved2; 1365 + } ZSTD_FrameHeader; 1366 + #define ZSTD_frameHeader ZSTD_FrameHeader /* old name */ 1367 + 1368 + /*! ZSTD_getFrameHeader() : 1369 + * decode Frame Header into `zfhPtr`, or requires larger `srcSize`. 1370 + * @return : 0 => header is complete, `zfhPtr` is correctly filled, 1371 + * >0 => `srcSize` is too small, @return value is the wanted `srcSize` amount, `zfhPtr` is not filled, 1372 + * or an error code, which can be tested using ZSTD_isError() */ 1373 + ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize); 1374 + /*! ZSTD_getFrameHeader_advanced() : 1375 + * same as ZSTD_getFrameHeader(), 1376 + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ 1377 + ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); 1378 + 1379 + /*! ZSTD_decompressionMargin() : 1380 + * Zstd supports in-place decompression, where the input and output buffers overlap. 1381 + * In this case, the output buffer must be at least (Margin + Output_Size) bytes large, 1382 + * and the input buffer must be at the end of the output buffer. 1383 + * 1384 + * _______________________ Output Buffer ________________________ 1385 + * | | 1386 + * | ____ Input Buffer ____| 1387 + * | | | 1388 + * v v v 1389 + * |---------------------------------------|-----------|----------| 1390 + * ^ ^ ^ 1391 + * |___________________ Output_Size ___________________|_ Margin _| 1392 + * 1393 + * NOTE: See also ZSTD_DECOMPRESSION_MARGIN(). 1394 + * NOTE: This applies only to single-pass decompression through ZSTD_decompress() or 1395 + * ZSTD_decompressDCtx(). 1396 + * NOTE: This function supports multi-frame input. 1397 + * 1398 + * @param src The compressed frame(s) 1399 + * @param srcSize The size of the compressed frame(s) 1400 + * @returns The decompression margin or an error that can be checked with ZSTD_isError(). 1401 + */ 1402 + ZSTDLIB_STATIC_API size_t ZSTD_decompressionMargin(const void* src, size_t srcSize); 1403 + 1404 + /*! ZSTD_DECOMPRESS_MARGIN() : 1405 + * Similar to ZSTD_decompressionMargin(), but instead of computing the margin from 1406 + * the compressed frame, compute it from the original size and the blockSizeLog. 1407 + * See ZSTD_decompressionMargin() for details. 1408 + * 1409 + * WARNING: This macro does not support multi-frame input, the input must be a single 1410 + * zstd frame. If you need that support use the function, or implement it yourself. 1411 + * 1412 + * @param originalSize The original uncompressed size of the data. 1413 + * @param blockSize The block size == MIN(windowSize, ZSTD_BLOCKSIZE_MAX). 1414 + * Unless you explicitly set the windowLog smaller than 1415 + * ZSTD_BLOCKSIZELOG_MAX you can just use ZSTD_BLOCKSIZE_MAX. 1416 + */ 1417 + #define ZSTD_DECOMPRESSION_MARGIN(originalSize, blockSize) ((size_t)( \ 1418 + ZSTD_FRAMEHEADERSIZE_MAX /* Frame header */ + \ 1419 + 4 /* checksum */ + \ 1420 + ((originalSize) == 0 ? 0 : 3 * (((originalSize) + (blockSize) - 1) / blockSize)) /* 3 bytes per block */ + \ 1421 + (blockSize) /* One block of margin */ \ 1422 + )) 1423 + 1477 1424 typedef enum { 1478 - ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ 1479 - ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ 1480 - } ZSTD_sequenceFormat_e; 1425 + ZSTD_sf_noBlockDelimiters = 0, /* ZSTD_Sequence[] has no block delimiters, just sequences */ 1426 + ZSTD_sf_explicitBlockDelimiters = 1 /* ZSTD_Sequence[] contains explicit block delimiters */ 1427 + } ZSTD_SequenceFormat_e; 1428 + #define ZSTD_sequenceFormat_e ZSTD_SequenceFormat_e /* old name */ 1429 + 1430 + /*! ZSTD_sequenceBound() : 1431 + * `srcSize` : size of the input buffer 1432 + * @return : upper-bound for the number of sequences that can be generated 1433 + * from a buffer of srcSize bytes 1434 + * 1435 + * note : returns number of sequences - to get bytes, multiply by sizeof(ZSTD_Sequence). 1436 + */ 1437 + ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize); 1481 1438 1482 1439 /*! ZSTD_generateSequences() : 1483 - * Generate sequences using ZSTD_compress2, given a source buffer. 1440 + * WARNING: This function is meant for debugging and informational purposes ONLY! 1441 + * Its implementation is flawed, and it will be deleted in a future version. 1442 + * It is not guaranteed to succeed, as there are several cases where it will give 1443 + * up and fail. You should NOT use this function in production code. 1444 + * 1445 + * This function is deprecated, and will be removed in a future version. 1446 + * 1447 + * Generate sequences using ZSTD_compress2(), given a source buffer. 1448 + * 1449 + * @param zc The compression context to be used for ZSTD_compress2(). Set any 1450 + * compression parameters you need on this context. 1451 + * @param outSeqs The output sequences buffer of size @p outSeqsSize 1452 + * @param outSeqsCapacity The size of the output sequences buffer. 1453 + * ZSTD_sequenceBound(srcSize) is an upper bound on the number 1454 + * of sequences that can be generated. 1455 + * @param src The source buffer to generate sequences from of size @p srcSize. 1456 + * @param srcSize The size of the source buffer. 1484 1457 * 1485 1458 * Each block will end with a dummy sequence 1486 1459 * with offset == 0, matchLength == 0, and litLength == length of last literals. 1487 1460 * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) 1488 1461 * simply acts as a block delimiter. 1489 1462 * 1490 - * zc can be used to insert custom compression params. 1491 - * This function invokes ZSTD_compress2 1492 - * 1493 - * The output of this function can be fed into ZSTD_compressSequences() with CCtx 1494 - * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters 1495 - * @return : number of sequences generated 1463 + * @returns The number of sequences generated, necessarily less than 1464 + * ZSTD_sequenceBound(srcSize), or an error code that can be checked 1465 + * with ZSTD_isError(). 1496 1466 */ 1497 - 1498 - ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, 1499 - size_t outSeqsSize, const void* src, size_t srcSize); 1467 + ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()") 1468 + ZSTDLIB_STATIC_API size_t 1469 + ZSTD_generateSequences(ZSTD_CCtx* zc, 1470 + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, 1471 + const void* src, size_t srcSize); 1500 1472 1501 1473 /*! ZSTD_mergeBlockDelimiters() : 1502 1474 * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals ··· 1608 1388 ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); 1609 1389 1610 1390 /*! ZSTD_compressSequences() : 1611 - * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. 1612 - * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) 1391 + * Compress an array of ZSTD_Sequence, associated with @src buffer, into dst. 1392 + * @src contains the entire input (not just the literals). 1393 + * If @srcSize > sum(sequence.length), the remaining bytes are considered all literals 1394 + * If a dictionary is included, then the cctx should reference the dict (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.). 1613 1395 * The entire source is compressed into a single frame. 1614 1396 * 1615 1397 * The compression behavior changes based on cctx params. In particular: ··· 1620 1398 * the block size derived from the cctx, and sequences may be split. This is the default setting. 1621 1399 * 1622 1400 * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain 1623 - * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. 1401 + * valid block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. 1624 1402 * 1625 - * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined 1626 - * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for 1627 - * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. 1403 + * When ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, it's possible to decide generating repcodes 1404 + * using the advanced parameter ZSTD_c_repcodeResolution. Repcodes will improve compression ratio, though the benefit 1405 + * can vary greatly depending on Sequences. On the other hand, repcode resolution is an expensive operation. 1406 + * By default, it's disabled at low (<10) compression levels, and enabled above the threshold (>=10). 1407 + * ZSTD_c_repcodeResolution makes it possible to directly manage this processing in either direction. 1408 + * 1409 + * If ZSTD_c_validateSequences == 0, this function blindly accepts the Sequences provided. Invalid Sequences cause undefined 1410 + * behavior. If ZSTD_c_validateSequences == 1, then the function will detect invalid Sequences (see doc/zstd_compression_format.md for 1411 + * specifics regarding offset/matchlength requirements) and then bail out and return an error. 1628 1412 * 1629 1413 * In addition to the two adjustable experimental params, there are other important cctx params. 1630 1414 * - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. ··· 1638 1410 * - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset 1639 1411 * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md 1640 1412 * 1641 - * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. 1642 - * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, 1643 - * and cannot emit an RLE block that disagrees with the repcode history 1644 - * @return : final compressed size or a ZSTD error. 1413 + * Note: Repcodes are, as of now, always re-calculated within this function, ZSTD_Sequence.rep is effectively unused. 1414 + * Dev Note: Once ability to ingest repcodes become available, the explicit block delims mode must respect those repcodes exactly, 1415 + * and cannot emit an RLE block that disagrees with the repcode history. 1416 + * @return : final compressed size, or a ZSTD error code. 1645 1417 */ 1646 - ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, 1647 - const ZSTD_Sequence* inSeqs, size_t inSeqsSize, 1648 - const void* src, size_t srcSize); 1418 + ZSTDLIB_STATIC_API size_t 1419 + ZSTD_compressSequences(ZSTD_CCtx* cctx, 1420 + void* dst, size_t dstCapacity, 1421 + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, 1422 + const void* src, size_t srcSize); 1423 + 1424 + 1425 + /*! ZSTD_compressSequencesAndLiterals() : 1426 + * This is a variant of ZSTD_compressSequences() which, 1427 + * instead of receiving (src,srcSize) as input parameter, receives (literals,litSize), 1428 + * aka all the literals, already extracted and laid out into a single continuous buffer. 1429 + * This can be useful if the process generating the sequences also happens to generate the buffer of literals, 1430 + * thus skipping an extraction + caching stage. 1431 + * It's a speed optimization, useful when the right conditions are met, 1432 + * but it also features the following limitations: 1433 + * - Only supports explicit delimiter mode 1434 + * - Currently does not support Sequences validation (so input Sequences are trusted) 1435 + * - Not compatible with frame checksum, which must be disabled 1436 + * - If any block is incompressible, will fail and return an error 1437 + * - @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error. 1438 + * - @litBufCapacity is the size of the underlying buffer into which literals are written, starting at address @literals. 1439 + * @litBufCapacity must be at least 8 bytes larger than @litSize. 1440 + * - @decompressedSize must be correct, and correspond to the sum of all Sequences. Any discrepancy will generate an error. 1441 + * @return : final compressed size, or a ZSTD error code. 1442 + */ 1443 + ZSTDLIB_STATIC_API size_t 1444 + ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx, 1445 + void* dst, size_t dstCapacity, 1446 + const ZSTD_Sequence* inSeqs, size_t nbSequences, 1447 + const void* literals, size_t litSize, size_t litBufCapacity, 1448 + size_t decompressedSize); 1649 1449 1650 1450 1651 1451 /*! ZSTD_writeSkippableFrame() : ··· 1681 1425 * 1682 1426 * Skippable frames begin with a 4-byte magic number. There are 16 possible choices of magic number, 1683 1427 * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. 1684 - * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so 1685 - * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. 1428 + * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, 1429 + * so the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. 1686 1430 * 1687 1431 * Returns an error if destination buffer is not large enough, if the source size is not representable 1688 1432 * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). ··· 1690 1434 * @return : number of bytes written or a ZSTD error. 1691 1435 */ 1692 1436 ZSTDLIB_STATIC_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, 1693 - const void* src, size_t srcSize, unsigned magicVariant); 1437 + const void* src, size_t srcSize, 1438 + unsigned magicVariant); 1694 1439 1695 1440 /*! ZSTD_readSkippableFrame() : 1696 - * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer. 1441 + * Retrieves the content of a zstd skippable frame starting at @src, and writes it to @dst buffer. 1697 1442 * 1698 - * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written, 1699 - * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested 1700 - * in the magicVariant. 1443 + * The parameter @magicVariant will receive the magicVariant that was supplied when the frame was written, 1444 + * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. 1445 + * This can be NULL if the caller is not interested in the magicVariant. 1701 1446 * 1702 1447 * Returns an error if destination buffer is not large enough, or if the frame is not skippable. 1703 1448 * 1704 1449 * @return : number of bytes written or a ZSTD error. 1705 1450 */ 1706 - ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant, 1707 - const void* src, size_t srcSize); 1451 + ZSTDLIB_STATIC_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, 1452 + unsigned* magicVariant, 1453 + const void* src, size_t srcSize); 1708 1454 1709 1455 /*! ZSTD_isSkippableFrame() : 1710 1456 * Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame. 1711 1457 */ 1712 - ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); 1458 + ZSTDLIB_STATIC_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); 1713 1459 1714 1460 1715 1461 ··· 1722 1464 /*! ZSTD_estimate*() : 1723 1465 * These functions make it possible to estimate memory usage 1724 1466 * of a future {D,C}Ctx, before its creation. 1467 + * This is useful in combination with ZSTD_initStatic(), 1468 + * which makes it possible to employ a static buffer for ZSTD_CCtx* state. 1725 1469 * 1726 1470 * ZSTD_estimateCCtxSize() will provide a memory budget large enough 1727 - * for any compression level up to selected one. 1728 - * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate 1729 - * does not include space for a window buffer. 1730 - * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. 1471 + * to compress data of any size using one-shot compression ZSTD_compressCCtx() or ZSTD_compress2() 1472 + * associated with any compression level up to max specified one. 1731 1473 * The estimate will assume the input may be arbitrarily large, 1732 1474 * which is the worst case. 1733 1475 * 1476 + * Note that the size estimation is specific for one-shot compression, 1477 + * it is not valid for streaming (see ZSTD_estimateCStreamSize*()) 1478 + * nor other potential ways of using a ZSTD_CCtx* state. 1479 + * 1734 1480 * When srcSize can be bound by a known and rather "small" value, 1735 - * this fact can be used to provide a tighter estimation 1736 - * because the CCtx compression context will need less memory. 1737 - * This tighter estimation can be provided by more advanced functions 1481 + * this knowledge can be used to provide a tighter budget estimation 1482 + * because the ZSTD_CCtx* state will need less memory for small inputs. 1483 + * This tighter estimation can be provided by employing more advanced functions 1738 1484 * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), 1739 1485 * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). 1740 1486 * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. 1741 1487 * 1742 - * Note 2 : only single-threaded compression is supported. 1488 + * Note : only single-threaded compression is supported. 1743 1489 * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. 1744 1490 */ 1745 - ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel); 1491 + ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel); 1746 1492 ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); 1747 1493 ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); 1748 1494 ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void); 1749 1495 1750 1496 /*! ZSTD_estimateCStreamSize() : 1751 - * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. 1752 - * It will also consider src size to be arbitrarily "large", which is worst case. 1497 + * ZSTD_estimateCStreamSize() will provide a memory budget large enough for streaming compression 1498 + * using any compression level up to the max specified one. 1499 + * It will also consider src size to be arbitrarily "large", which is a worst case scenario. 1753 1500 * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. 1754 1501 * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. 1755 1502 * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. 1756 1503 * Note : CStream size estimation is only correct for single-threaded compression. 1757 - * ZSTD_DStream memory budget depends on window Size. 1504 + * ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. 1505 + * Note 2 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time. 1506 + * Size estimates assume that no external sequence producer is registered. 1507 + * 1508 + * ZSTD_DStream memory budget depends on frame's window Size. 1758 1509 * This information can be passed manually, using ZSTD_estimateDStreamSize, 1759 1510 * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); 1511 + * Any frame requesting a window size larger than max specified one will be rejected. 1760 1512 * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), 1761 1513 * an internal ?Dict will be created, which additional size is not estimated here. 1762 - * In this case, get total size by adding ZSTD_estimate?DictSize */ 1763 - ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel); 1514 + * In this case, get total size by adding ZSTD_estimate?DictSize 1515 + */ 1516 + ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int maxCompressionLevel); 1764 1517 ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); 1765 1518 ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); 1766 - ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize); 1519 + ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t maxWindowSize); 1767 1520 ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); 1768 1521 1769 1522 /*! ZSTD_estimate?DictSize() : ··· 1837 1568 typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; 1838 1569 static 1839 1570 __attribute__((__unused__)) 1571 + 1572 + #if defined(__clang__) && __clang_major__ >= 5 1573 + #pragma clang diagnostic push 1574 + #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" 1575 + #endif 1840 1576 ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /*< this constant defers to stdlib's functions */ 1577 + #if defined(__clang__) && __clang_major__ >= 5 1578 + #pragma clang diagnostic pop 1579 + #endif 1841 1580 1842 1581 ZSTDLIB_STATIC_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); 1843 1582 ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); ··· 1926 1649 * This function never fails (wide contract) */ 1927 1650 ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); 1928 1651 1652 + /*! ZSTD_CCtx_setCParams() : 1653 + * Set all parameters provided within @p cparams into the working @p cctx. 1654 + * Note : if modifying parameters during compression (MT mode only), 1655 + * note that changes to the .windowLog parameter will be ignored. 1656 + * @return 0 on success, or an error code (can be checked with ZSTD_isError()). 1657 + * On failure, no parameters are updated. 1658 + */ 1659 + ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams); 1660 + 1661 + /*! ZSTD_CCtx_setFParams() : 1662 + * Set all parameters provided within @p fparams into the working @p cctx. 1663 + * @return 0 on success, or an error code (can be checked with ZSTD_isError()). 1664 + */ 1665 + ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams); 1666 + 1667 + /*! ZSTD_CCtx_setParams() : 1668 + * Set all parameters provided within @p params into the working @p cctx. 1669 + * @return 0 on success, or an error code (can be checked with ZSTD_isError()). 1670 + */ 1671 + ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params); 1672 + 1929 1673 /*! ZSTD_compress_advanced() : 1930 1674 * Note : this function is now DEPRECATED. 1931 1675 * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. 1932 1676 * This prototype will generate compilation warnings. */ 1933 1677 ZSTD_DEPRECATED("use ZSTD_compress2") 1678 + ZSTDLIB_STATIC_API 1934 1679 size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, 1935 - void* dst, size_t dstCapacity, 1936 - const void* src, size_t srcSize, 1937 - const void* dict,size_t dictSize, 1938 - ZSTD_parameters params); 1680 + void* dst, size_t dstCapacity, 1681 + const void* src, size_t srcSize, 1682 + const void* dict,size_t dictSize, 1683 + ZSTD_parameters params); 1939 1684 1940 1685 /*! ZSTD_compress_usingCDict_advanced() : 1941 1686 * Note : this function is now DEPRECATED. 1942 1687 * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. 1943 1688 * This prototype will generate compilation warnings. */ 1944 1689 ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary") 1690 + ZSTDLIB_STATIC_API 1945 1691 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, 1946 1692 void* dst, size_t dstCapacity, 1947 1693 const void* src, size_t srcSize, ··· 2025 1725 * See the comments on that enum for an explanation of the feature. */ 2026 1726 #define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 2027 1727 2028 - /* Controlled with ZSTD_paramSwitch_e enum. 1728 + /* Controlled with ZSTD_ParamSwitch_e enum. 2029 1729 * Default is ZSTD_ps_auto. 2030 1730 * Set to ZSTD_ps_disable to never compress literals. 2031 1731 * Set to ZSTD_ps_enable to always compress literals. (Note: uncompressed literals ··· 2036 1736 * negative compression levels do not use literal compression. 2037 1737 */ 2038 1738 #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 2039 - 2040 - /* Tries to fit compressed block size to be around targetCBlockSize. 2041 - * No target when targetCBlockSize == 0. 2042 - * There is no guarantee on compressed block size (default:0) */ 2043 - #define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 2044 1739 2045 1740 /* User's best guess of source size. 2046 1741 * Hint is not valid when srcSizeHint == 0. ··· 2103 1808 * Experimental parameter. 2104 1809 * Default is 0 == disabled. Set to 1 to enable. 2105 1810 * 2106 - * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same 2107 - * between calls, except for the modifications that zstd makes to pos (the 2108 - * caller must not modify pos). This is checked by the compressor, and 2109 - * compression will fail if it ever changes. This means the only flush 2110 - * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end 2111 - * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos) 2112 - * MUST not be modified during compression or you will get data corruption. 1811 + * Tells the compressor that input data presented with ZSTD_inBuffer 1812 + * will ALWAYS be the same between calls. 1813 + * Technically, the @src pointer must never be changed, 1814 + * and the @pos field can only be updated by zstd. 1815 + * However, it's possible to increase the @size field, 1816 + * allowing scenarios where more data can be appended after compressions starts. 1817 + * These conditions are checked by the compressor, 1818 + * and compression will fail if they are not respected. 1819 + * Also, data in the ZSTD_inBuffer within the range [src, src + pos) 1820 + * MUST not be modified during compression or it will result in data corruption. 2113 1821 * 2114 1822 * When this flag is enabled zstd won't allocate an input window buffer, 2115 1823 * because the user guarantees it can reference the ZSTD_inBuffer until ··· 2120 1822 * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also 2121 1823 * avoid the memcpy() from the input buffer to the input window buffer. 2122 1824 * 2123 - * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used. 2124 - * That means this flag cannot be used with ZSTD_compressStream(). 2125 - * 2126 1825 * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using 2127 1826 * this flag is ALWAYS memory safe, and will never access out-of-bounds 2128 - * memory. However, compression WILL fail if you violate the preconditions. 1827 + * memory. However, compression WILL fail if conditions are not respected. 2129 1828 * 2130 - * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST 2131 - * not be modified during compression or you will get data corruption. This 2132 - * is because zstd needs to reference data in the ZSTD_inBuffer to find 1829 + * WARNING: The data in the ZSTD_inBuffer in the range [src, src + pos) MUST 1830 + * not be modified during compression or it will result in data corruption. 1831 + * This is because zstd needs to reference data in the ZSTD_inBuffer to find 2133 1832 * matches. Normally zstd maintains its own window buffer for this purpose, 2134 - * but passing this flag tells zstd to use the user provided buffer. 1833 + * but passing this flag tells zstd to rely on user provided buffer instead. 2135 1834 */ 2136 1835 #define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9 2137 1836 ··· 2166 1871 /* ZSTD_c_validateSequences 2167 1872 * Default is 0 == disabled. Set to 1 to enable sequence validation. 2168 1873 * 2169 - * For use with sequence compression API: ZSTD_compressSequences(). 2170 - * Designates whether or not we validate sequences provided to ZSTD_compressSequences() 1874 + * For use with sequence compression API: ZSTD_compressSequences*(). 1875 + * Designates whether or not provided sequences are validated within ZSTD_compressSequences*() 2171 1876 * during function execution. 2172 1877 * 2173 - * Without validation, providing a sequence that does not conform to the zstd spec will cause 2174 - * undefined behavior, and may produce a corrupted block. 1878 + * When Sequence validation is disabled (default), Sequences are compressed as-is, 1879 + * so they must correct, otherwise it would result in a corruption error. 2175 1880 * 2176 - * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for 1881 + * Sequence validation adds some protection, by ensuring that all values respect boundary conditions. 1882 + * If a Sequence is detected invalid (see doc/zstd_compression_format.md for 2177 1883 * specifics regarding offset/matchlength requirements) then the function will bail out and 2178 1884 * return an error. 2179 - * 2180 1885 */ 2181 1886 #define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 2182 1887 2183 - /* ZSTD_c_useBlockSplitter 2184 - * Controlled with ZSTD_paramSwitch_e enum. 1888 + /* ZSTD_c_blockSplitterLevel 1889 + * note: this parameter only influences the first splitter stage, 1890 + * which is active before producing the sequences. 1891 + * ZSTD_c_splitAfterSequences controls the next splitter stage, 1892 + * which is active after sequence production. 1893 + * Note that both can be combined. 1894 + * Allowed values are between 0 and ZSTD_BLOCKSPLITTER_LEVEL_MAX included. 1895 + * 0 means "auto", which will select a value depending on current ZSTD_c_strategy. 1896 + * 1 means no splitting. 1897 + * Then, values from 2 to 6 are sorted in increasing cpu load order. 1898 + * 1899 + * Note that currently the first block is never split, 1900 + * to ensure expansion guarantees in presence of incompressible data. 1901 + */ 1902 + #define ZSTD_BLOCKSPLITTER_LEVEL_MAX 6 1903 + #define ZSTD_c_blockSplitterLevel ZSTD_c_experimentalParam20 1904 + 1905 + /* ZSTD_c_splitAfterSequences 1906 + * This is a stronger splitter algorithm, 1907 + * based on actual sequences previously produced by the selected parser. 1908 + * It's also slower, and as a consequence, mostly used for high compression levels. 1909 + * While the post-splitter does overlap with the pre-splitter, 1910 + * both can nonetheless be combined, 1911 + * notably with ZSTD_c_blockSplitterLevel at ZSTD_BLOCKSPLITTER_LEVEL_MAX, 1912 + * resulting in higher compression ratio than just one of them. 1913 + * 2185 1914 * Default is ZSTD_ps_auto. 2186 1915 * Set to ZSTD_ps_disable to never use block splitter. 2187 1916 * Set to ZSTD_ps_enable to always use block splitter. ··· 2213 1894 * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use 2214 1895 * block splitting based on the compression parameters. 2215 1896 */ 2216 - #define ZSTD_c_useBlockSplitter ZSTD_c_experimentalParam13 1897 + #define ZSTD_c_splitAfterSequences ZSTD_c_experimentalParam13 2217 1898 2218 1899 /* ZSTD_c_useRowMatchFinder 2219 - * Controlled with ZSTD_paramSwitch_e enum. 1900 + * Controlled with ZSTD_ParamSwitch_e enum. 2220 1901 * Default is ZSTD_ps_auto. 2221 1902 * Set to ZSTD_ps_disable to never use row-based matchfinder. 2222 1903 * Set to ZSTD_ps_enable to force usage of row-based matchfinder. ··· 2246 1927 * cost to memcpy() the data. 2247 1928 */ 2248 1929 #define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15 1930 + 1931 + /* ZSTD_c_prefetchCDictTables 1932 + * Controlled with ZSTD_ParamSwitch_e enum. Default is ZSTD_ps_auto. 1933 + * 1934 + * In some situations, zstd uses CDict tables in-place rather than copying them 1935 + * into the working context. (See docs on ZSTD_dictAttachPref_e above for details). 1936 + * In such situations, compression speed is seriously impacted when CDict tables are 1937 + * "cold" (outside CPU cache). This parameter instructs zstd to prefetch CDict tables 1938 + * when they are used in-place. 1939 + * 1940 + * For sufficiently small inputs, the cost of the prefetch will outweigh the benefit. 1941 + * For sufficiently large inputs, zstd will by default memcpy() CDict tables 1942 + * into the working context, so there is no need to prefetch. This parameter is 1943 + * targeted at a middle range of input sizes, where a prefetch is cheap enough to be 1944 + * useful but memcpy() is too expensive. The exact range of input sizes where this 1945 + * makes sense is best determined by careful experimentation. 1946 + * 1947 + * Note: for this parameter, ZSTD_ps_auto is currently equivalent to ZSTD_ps_disable, 1948 + * but in the future zstd may conditionally enable this feature via an auto-detection 1949 + * heuristic for cold CDicts. 1950 + * Use ZSTD_ps_disable to opt out of prefetching under any circumstances. 1951 + */ 1952 + #define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 1953 + 1954 + /* ZSTD_c_enableSeqProducerFallback 1955 + * Allowed values are 0 (disable) and 1 (enable). The default setting is 0. 1956 + * 1957 + * Controls whether zstd will fall back to an internal sequence producer if an 1958 + * external sequence producer is registered and returns an error code. This fallback 1959 + * is block-by-block: the internal sequence producer will only be called for blocks 1960 + * where the external sequence producer returns an error code. Fallback parsing will 1961 + * follow any other cParam settings, such as compression level, the same as in a 1962 + * normal (fully-internal) compression operation. 1963 + * 1964 + * The user is strongly encouraged to read the full Block-Level Sequence Producer API 1965 + * documentation (below) before setting this parameter. */ 1966 + #define ZSTD_c_enableSeqProducerFallback ZSTD_c_experimentalParam17 1967 + 1968 + /* ZSTD_c_maxBlockSize 1969 + * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB). 1970 + * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default. 1971 + * 1972 + * This parameter can be used to set an upper bound on the blocksize 1973 + * that overrides the default ZSTD_BLOCKSIZE_MAX. It cannot be used to set upper 1974 + * bounds greater than ZSTD_BLOCKSIZE_MAX or bounds lower than 1KB (will make 1975 + * compressBound() inaccurate). Only currently meant to be used for testing. 1976 + */ 1977 + #define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18 1978 + 1979 + /* ZSTD_c_repcodeResolution 1980 + * This parameter only has an effect if ZSTD_c_blockDelimiters is 1981 + * set to ZSTD_sf_explicitBlockDelimiters (may change in the future). 1982 + * 1983 + * This parameter affects how zstd parses external sequences, 1984 + * provided via the ZSTD_compressSequences*() API 1985 + * or from an external block-level sequence producer. 1986 + * 1987 + * If set to ZSTD_ps_enable, the library will check for repeated offsets within 1988 + * external sequences, even if those repcodes are not explicitly indicated in 1989 + * the "rep" field. Note that this is the only way to exploit repcode matches 1990 + * while using compressSequences*() or an external sequence producer, since zstd 1991 + * currently ignores the "rep" field of external sequences. 1992 + * 1993 + * If set to ZSTD_ps_disable, the library will not exploit repeated offsets in 1994 + * external sequences, regardless of whether the "rep" field has been set. This 1995 + * reduces sequence compression overhead by about 25% while sacrificing some 1996 + * compression ratio. 1997 + * 1998 + * The default value is ZSTD_ps_auto, for which the library will enable/disable 1999 + * based on compression level (currently: level<10 disables, level>=10 enables). 2000 + */ 2001 + #define ZSTD_c_repcodeResolution ZSTD_c_experimentalParam19 2002 + #define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19 /* older name */ 2003 + 2249 2004 2250 2005 /*! ZSTD_CCtx_getParameter() : 2251 2006 * Get the requested compression parameter value, selected by enum ZSTD_cParameter, ··· 2477 2084 * in the range [dst, dst + pos) MUST not be modified during decompression 2478 2085 * or you will get data corruption. 2479 2086 * 2480 - * When this flags is enabled zstd won't allocate an output buffer, because 2087 + * When this flag is enabled zstd won't allocate an output buffer, because 2481 2088 * it can write directly to the ZSTD_outBuffer, but it will still allocate 2482 2089 * an input buffer large enough to fit any compressed block. This will also 2483 2090 * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. ··· 2530 2137 */ 2531 2138 #define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 2532 2139 2140 + /* ZSTD_d_disableHuffmanAssembly 2141 + * Set to 1 to disable the Huffman assembly implementation. 2142 + * The default value is 0, which allows zstd to use the Huffman assembly 2143 + * implementation if available. 2144 + * 2145 + * This parameter can be used to disable Huffman assembly at runtime. 2146 + * If you want to disable it at compile time you can define the macro 2147 + * ZSTD_DISABLE_ASM. 2148 + */ 2149 + #define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5 2150 + 2151 + /* ZSTD_d_maxBlockSize 2152 + * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB). 2153 + * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default. 2154 + * 2155 + * Forces the decompressor to reject blocks whose content size is 2156 + * larger than the configured maxBlockSize. When maxBlockSize is 2157 + * larger than the windowSize, the windowSize is used instead. 2158 + * This saves memory on the decoder when you know all blocks are small. 2159 + * 2160 + * This option is typically used in conjunction with ZSTD_c_maxBlockSize. 2161 + * 2162 + * WARNING: This causes the decoder to reject otherwise valid frames 2163 + * that have block sizes larger than the configured maxBlockSize. 2164 + */ 2165 + #define ZSTD_d_maxBlockSize ZSTD_d_experimentalParam6 2166 + 2533 2167 2534 2168 /*! ZSTD_DCtx_setFormat() : 2535 2169 * This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter(). ··· 2565 2145 * such ZSTD_f_zstd1_magicless for example. 2566 2146 * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ 2567 2147 ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead") 2148 + ZSTDLIB_STATIC_API 2568 2149 size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); 2569 2150 2570 2151 /*! ZSTD_decompressStream_simpleArgs() : ··· 2602 2181 * This prototype will generate compilation warnings. 2603 2182 */ 2604 2183 ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") 2184 + ZSTDLIB_STATIC_API 2605 2185 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, 2606 2186 int compressionLevel, 2607 2187 unsigned long long pledgedSrcSize); ··· 2620 2198 * This prototype will generate compilation warnings. 2621 2199 */ 2622 2200 ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") 2201 + ZSTDLIB_STATIC_API 2623 2202 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, 2624 2203 const void* dict, size_t dictSize, 2625 2204 int compressionLevel); 2626 2205 2627 2206 /*! ZSTD_initCStream_advanced() : 2628 - * This function is DEPRECATED, and is approximately equivalent to: 2207 + * This function is DEPRECATED, and is equivalent to: 2629 2208 * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 2630 - * // Pseudocode: Set each zstd parameter and leave the rest as-is. 2631 - * for ((param, value) : params) { 2632 - * ZSTD_CCtx_setParameter(zcs, param, value); 2633 - * } 2209 + * ZSTD_CCtx_setParams(zcs, params); 2634 2210 * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); 2635 2211 * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); 2636 2212 * ··· 2638 2218 * This prototype will generate compilation warnings. 2639 2219 */ 2640 2220 ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") 2221 + ZSTDLIB_STATIC_API 2641 2222 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, 2642 2223 const void* dict, size_t dictSize, 2643 2224 ZSTD_parameters params, ··· 2653 2232 * This prototype will generate compilation warnings. 2654 2233 */ 2655 2234 ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") 2235 + ZSTDLIB_STATIC_API 2656 2236 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); 2657 2237 2658 2238 /*! ZSTD_initCStream_usingCDict_advanced() : 2659 - * This function is DEPRECATED, and is approximately equivalent to: 2239 + * This function is DEPRECATED, and is equivalent to: 2660 2240 * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); 2661 - * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. 2662 - * for ((fParam, value) : fParams) { 2663 - * ZSTD_CCtx_setParameter(zcs, fParam, value); 2664 - * } 2241 + * ZSTD_CCtx_setFParams(zcs, fParams); 2665 2242 * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); 2666 2243 * ZSTD_CCtx_refCDict(zcs, cdict); 2667 2244 * ··· 2669 2250 * This prototype will generate compilation warnings. 2670 2251 */ 2671 2252 ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") 2253 + ZSTDLIB_STATIC_API 2672 2254 size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, 2673 2255 const ZSTD_CDict* cdict, 2674 2256 ZSTD_frameParameters fParams, ··· 2684 2264 * explicitly specified. 2685 2265 * 2686 2266 * start a new frame, using same parameters from previous frame. 2687 - * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. 2267 + * This is typically useful to skip dictionary loading stage, since it will reuse it in-place. 2688 2268 * Note that zcs must be init at least once before using ZSTD_resetCStream(). 2689 2269 * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. 2690 2270 * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. ··· 2694 2274 * This prototype will generate compilation warnings. 2695 2275 */ 2696 2276 ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") 2277 + ZSTDLIB_STATIC_API 2697 2278 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); 2698 2279 2699 2280 ··· 2740 2319 * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); 2741 2320 * 2742 2321 * note: no dictionary will be used if dict == NULL or dictSize < 8 2743 - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x 2744 2322 */ 2323 + ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_loadDictionary, see zstd.h for detailed instructions") 2745 2324 ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); 2746 2325 2747 2326 /*! ··· 2751 2330 * ZSTD_DCtx_refDDict(zds, ddict); 2752 2331 * 2753 2332 * note : ddict is referenced, it must outlive decompression session 2754 - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x 2755 2333 */ 2334 + ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_refDDict, see zstd.h for detailed instructions") 2756 2335 ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); 2757 2336 2758 2337 /*! ··· 2760 2339 * 2761 2340 * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); 2762 2341 * 2763 - * re-use decompression parameters from previous init; saves dictionary loading 2764 - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x 2342 + * reuse decompression parameters from previous init; saves dictionary loading 2765 2343 */ 2344 + ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions") 2766 2345 ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); 2767 2346 2768 2347 2348 + /* ********************* BLOCK-LEVEL SEQUENCE PRODUCER API ********************* 2349 + * 2350 + * *** OVERVIEW *** 2351 + * The Block-Level Sequence Producer API allows users to provide their own custom 2352 + * sequence producer which libzstd invokes to process each block. The produced list 2353 + * of sequences (literals and matches) is then post-processed by libzstd to produce 2354 + * valid compressed blocks. 2355 + * 2356 + * This block-level offload API is a more granular complement of the existing 2357 + * frame-level offload API compressSequences() (introduced in v1.5.1). It offers 2358 + * an easier migration story for applications already integrated with libzstd: the 2359 + * user application continues to invoke the same compression functions 2360 + * ZSTD_compress2() or ZSTD_compressStream2() as usual, and transparently benefits 2361 + * from the specific advantages of the external sequence producer. For example, 2362 + * the sequence producer could be tuned to take advantage of known characteristics 2363 + * of the input, to offer better speed / ratio, or could leverage hardware 2364 + * acceleration not available within libzstd itself. 2365 + * 2366 + * See contrib/externalSequenceProducer for an example program employing the 2367 + * Block-Level Sequence Producer API. 2368 + * 2369 + * *** USAGE *** 2370 + * The user is responsible for implementing a function of type 2371 + * ZSTD_sequenceProducer_F. For each block, zstd will pass the following 2372 + * arguments to the user-provided function: 2373 + * 2374 + * - sequenceProducerState: a pointer to a user-managed state for the sequence 2375 + * producer. 2376 + * 2377 + * - outSeqs, outSeqsCapacity: an output buffer for the sequence producer. 2378 + * outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). The memory 2379 + * backing outSeqs is managed by the CCtx. 2380 + * 2381 + * - src, srcSize: an input buffer for the sequence producer to parse. 2382 + * srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX. 2383 + * 2384 + * - dict, dictSize: a history buffer, which may be empty, which the sequence 2385 + * producer may reference as it parses the src buffer. Currently, zstd will 2386 + * always pass dictSize == 0 into external sequence producers, but this will 2387 + * change in the future. 2388 + * 2389 + * - compressionLevel: a signed integer representing the zstd compression level 2390 + * set by the user for the current operation. The sequence producer may choose 2391 + * to use this information to change its compression strategy and speed/ratio 2392 + * tradeoff. Note: the compression level does not reflect zstd parameters set 2393 + * through the advanced API. 2394 + * 2395 + * - windowSize: a size_t representing the maximum allowed offset for external 2396 + * sequences. Note that sequence offsets are sometimes allowed to exceed the 2397 + * windowSize if a dictionary is present, see doc/zstd_compression_format.md 2398 + * for details. 2399 + * 2400 + * The user-provided function shall return a size_t representing the number of 2401 + * sequences written to outSeqs. This return value will be treated as an error 2402 + * code if it is greater than outSeqsCapacity. The return value must be non-zero 2403 + * if srcSize is non-zero. The ZSTD_SEQUENCE_PRODUCER_ERROR macro is provided 2404 + * for convenience, but any value greater than outSeqsCapacity will be treated as 2405 + * an error code. 2406 + * 2407 + * If the user-provided function does not return an error code, the sequences 2408 + * written to outSeqs must be a valid parse of the src buffer. Data corruption may 2409 + * occur if the parse is not valid. A parse is defined to be valid if the 2410 + * following conditions hold: 2411 + * - The sum of matchLengths and literalLengths must equal srcSize. 2412 + * - All sequences in the parse, except for the final sequence, must have 2413 + * matchLength >= ZSTD_MINMATCH_MIN. The final sequence must have 2414 + * matchLength >= ZSTD_MINMATCH_MIN or matchLength == 0. 2415 + * - All offsets must respect the windowSize parameter as specified in 2416 + * doc/zstd_compression_format.md. 2417 + * - If the final sequence has matchLength == 0, it must also have offset == 0. 2418 + * 2419 + * zstd will only validate these conditions (and fail compression if they do not 2420 + * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence 2421 + * validation has a performance cost. 2422 + * 2423 + * If the user-provided function returns an error, zstd will either fall back 2424 + * to an internal sequence producer or fail the compression operation. The user can 2425 + * choose between the two behaviors by setting the ZSTD_c_enableSeqProducerFallback 2426 + * cParam. Fallback compression will follow any other cParam settings, such as 2427 + * compression level, the same as in a normal compression operation. 2428 + * 2429 + * The user shall instruct zstd to use a particular ZSTD_sequenceProducer_F 2430 + * function by calling 2431 + * ZSTD_registerSequenceProducer(cctx, 2432 + * sequenceProducerState, 2433 + * sequenceProducer) 2434 + * This setting will persist until the next parameter reset of the CCtx. 2435 + * 2436 + * The sequenceProducerState must be initialized by the user before calling 2437 + * ZSTD_registerSequenceProducer(). The user is responsible for destroying the 2438 + * sequenceProducerState. 2439 + * 2440 + * *** LIMITATIONS *** 2441 + * This API is compatible with all zstd compression APIs which respect advanced parameters. 2442 + * However, there are three limitations: 2443 + * 2444 + * First, the ZSTD_c_enableLongDistanceMatching cParam is not currently supported. 2445 + * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with a block-level 2446 + * external sequence producer. 2447 + * - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in some 2448 + * cases (see its documentation for details). Users must explicitly set 2449 + * ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an external 2450 + * sequence producer is registered. 2451 + * - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default 2452 + * whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should 2453 + * check the docs on ZSTD_c_enableLongDistanceMatching whenever the Block-Level Sequence 2454 + * Producer API is used in conjunction with advanced settings (like ZSTD_c_windowLog). 2455 + * 2456 + * Second, history buffers are not currently supported. Concretely, zstd will always pass 2457 + * dictSize == 0 to the external sequence producer (for now). This has two implications: 2458 + * - Dictionaries are not currently supported. Compression will *not* fail if the user 2459 + * references a dictionary, but the dictionary won't have any effect. 2460 + * - Stream history is not currently supported. All advanced compression APIs, including 2461 + * streaming APIs, work with external sequence producers, but each block is treated as 2462 + * an independent chunk without history from previous blocks. 2463 + * 2464 + * Third, multi-threading within a single compression is not currently supported. In other words, 2465 + * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered. 2466 + * Multi-threading across compressions is fine: simply create one CCtx per thread. 2467 + * 2468 + * Long-term, we plan to overcome all three limitations. There is no technical blocker to 2469 + * overcoming them. It is purely a question of engineering effort. 2470 + */ 2471 + 2472 + #define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1)) 2473 + 2474 + typedef size_t (*ZSTD_sequenceProducer_F) ( 2475 + void* sequenceProducerState, 2476 + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, 2477 + const void* src, size_t srcSize, 2478 + const void* dict, size_t dictSize, 2479 + int compressionLevel, 2480 + size_t windowSize 2481 + ); 2482 + 2483 + /*! ZSTD_registerSequenceProducer() : 2484 + * Instruct zstd to use a block-level external sequence producer function. 2485 + * 2486 + * The sequenceProducerState must be initialized by the caller, and the caller is 2487 + * responsible for managing its lifetime. This parameter is sticky across 2488 + * compressions. It will remain set until the user explicitly resets compression 2489 + * parameters. 2490 + * 2491 + * Sequence producer registration is considered to be an "advanced parameter", 2492 + * part of the "advanced API". This means it will only have an effect on compression 2493 + * APIs which respect advanced parameters, such as compress2() and compressStream2(). 2494 + * Older compression APIs such as compressCCtx(), which predate the introduction of 2495 + * "advanced parameters", will ignore any external sequence producer setting. 2496 + * 2497 + * The sequence producer can be "cleared" by registering a NULL function pointer. This 2498 + * removes all limitations described above in the "LIMITATIONS" section of the API docs. 2499 + * 2500 + * The user is strongly encouraged to read the full API documentation (above) before 2501 + * calling this function. */ 2502 + ZSTDLIB_STATIC_API void 2503 + ZSTD_registerSequenceProducer( 2504 + ZSTD_CCtx* cctx, 2505 + void* sequenceProducerState, 2506 + ZSTD_sequenceProducer_F sequenceProducer 2507 + ); 2508 + 2509 + /*! ZSTD_CCtxParams_registerSequenceProducer() : 2510 + * Same as ZSTD_registerSequenceProducer(), but operates on ZSTD_CCtx_params. 2511 + * This is used for accurate size estimation with ZSTD_estimateCCtxSize_usingCCtxParams(), 2512 + * which is needed when creating a ZSTD_CCtx with ZSTD_initStaticCCtx(). 2513 + * 2514 + * If you are using the external sequence producer API in a scenario where ZSTD_initStaticCCtx() 2515 + * is required, then this function is for you. Otherwise, you probably don't need it. 2516 + * 2517 + * See tests/zstreamtest.c for example usage. */ 2518 + ZSTDLIB_STATIC_API void 2519 + ZSTD_CCtxParams_registerSequenceProducer( 2520 + ZSTD_CCtx_params* params, 2521 + void* sequenceProducerState, 2522 + ZSTD_sequenceProducer_F sequenceProducer 2523 + ); 2524 + 2525 + 2769 2526 /* ******************************************************************* 2770 - * Buffer-less and synchronous inner streaming functions 2527 + * Buffer-less and synchronous inner streaming functions (DEPRECATED) 2771 2528 * 2772 - * This is an advanced API, giving full control over buffer management, for users which need direct control over memory. 2773 - * But it's also a complex one, with several restrictions, documented below. 2774 - * Prefer normal streaming API for an easier experience. 2529 + * This API is deprecated, and will be removed in a future version. 2530 + * It allows streaming (de)compression with user allocated buffers. 2531 + * However, it is hard to use, and not as well tested as the rest of 2532 + * our API. 2533 + * 2534 + * Please use the normal streaming API instead: ZSTD_compressStream2, 2535 + * and ZSTD_decompressStream. 2536 + * If there is functionality that you need, but it doesn't provide, 2537 + * please open an issue on our GitHub. 2775 2538 ********************************************************************* */ 2776 2539 2777 2540 /* ··· 2963 2358 2964 2359 A ZSTD_CCtx object is required to track streaming operations. 2965 2360 Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. 2966 - ZSTD_CCtx object can be re-used multiple times within successive compression operations. 2361 + ZSTD_CCtx object can be reused multiple times within successive compression operations. 2967 2362 2968 2363 Start by initializing a context. 2969 2364 Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression. 2970 - It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() 2971 2365 2972 2366 Then, consume your input using ZSTD_compressContinue(). 2973 2367 There are some important considerations to keep in mind when using this advanced function : ··· 2984 2380 It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. 2985 2381 Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. 2986 2382 2987 - `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. 2383 + `ZSTD_CCtx` object can be reused (ZSTD_compressBegin()) to compress again. 2988 2384 */ 2989 2385 2990 2386 /*===== Buffer-less streaming compression functions =====*/ 2387 + ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") 2991 2388 ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); 2389 + ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") 2992 2390 ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); 2391 + ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") 2993 2392 ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */ 2994 - ZSTDLIB_STATIC_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ 2995 2393 2394 + ZSTD_DEPRECATED("This function will likely be removed in a future release. It is misleading and has very limited utility.") 2395 + ZSTDLIB_STATIC_API 2396 + size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ 2397 + 2398 + ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") 2996 2399 ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); 2400 + ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") 2997 2401 ZSTDLIB_STATIC_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); 2998 2402 2999 2403 /* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */ 3000 2404 ZSTD_DEPRECATED("use advanced API to access custom parameters") 2405 + ZSTDLIB_STATIC_API 3001 2406 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ 3002 2407 ZSTD_DEPRECATED("use advanced API to access custom parameters") 2408 + ZSTDLIB_STATIC_API 3003 2409 size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ 3004 2410 /* 3005 2411 Buffer-less streaming decompression (synchronous mode) 3006 2412 3007 2413 A ZSTD_DCtx object is required to track streaming operations. 3008 2414 Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. 3009 - A ZSTD_DCtx object can be re-used multiple times. 2415 + A ZSTD_DCtx object can be reused multiple times. 3010 2416 3011 2417 First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). 3012 2418 Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. 3013 2419 Data fragment must be large enough to ensure successful decoding. 3014 2420 `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. 3015 - @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. 3016 - >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. 2421 + result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. 2422 + >0 : `srcSize` is too small, please provide at least result bytes on next attempt. 3017 2423 errorCode, which can be tested using ZSTD_isError(). 3018 2424 3019 - It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, 2425 + It fills a ZSTD_FrameHeader structure with important information to correctly decode the frame, 3020 2426 such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). 3021 2427 Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. 3022 2428 As a consequence, check that values remain within valid application range. ··· 3042 2428 3043 2429 The most memory efficient way is to use a round buffer of sufficient size. 3044 2430 Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), 3045 - which can @return an error code if required value is too large for current system (in 32-bits mode). 2431 + which can return an error code if required value is too large for current system (in 32-bits mode). 3046 2432 In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, 3047 2433 up to the moment there is not enough room left in the buffer to guarantee decoding another full block, 3048 2434 which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. ··· 3062 2448 ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). 3063 2449 ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. 3064 2450 3065 - @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). 2451 + result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). 3066 2452 It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. 3067 2453 It can also be an error code, which can be tested with ZSTD_isError(). 3068 2454 ··· 3085 2471 */ 3086 2472 3087 2473 /*===== Buffer-less streaming decompression functions =====*/ 3088 - typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; 3089 - typedef struct { 3090 - unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ 3091 - unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ 3092 - unsigned blockSizeMax; 3093 - ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ 3094 - unsigned headerSize; 3095 - unsigned dictID; 3096 - unsigned checksumFlag; 3097 - } ZSTD_frameHeader; 3098 2474 3099 - /*! ZSTD_getFrameHeader() : 3100 - * decode Frame Header, or requires larger `srcSize`. 3101 - * @return : 0, `zfhPtr` is correctly filled, 3102 - * >0, `srcSize` is too small, value is wanted `srcSize` amount, 3103 - * or an error code, which can be tested using ZSTD_isError() */ 3104 - ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /*< doesn't consume input */ 3105 - /*! ZSTD_getFrameHeader_advanced() : 3106 - * same as ZSTD_getFrameHeader(), 3107 - * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ 3108 - ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); 3109 2475 ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ 3110 2476 3111 2477 ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); ··· 3096 2502 ZSTDLIB_STATIC_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); 3097 2503 3098 2504 /* misc */ 2505 + ZSTD_DEPRECATED("This function will likely be removed in the next minor release. It is misleading and has very limited utility.") 3099 2506 ZSTDLIB_STATIC_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); 3100 2507 typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; 3101 2508 ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); ··· 3104 2509 3105 2510 3106 2511 3107 - /* ============================ */ 3108 - /* Block level API */ 3109 - /* ============================ */ 2512 + /* ========================================= */ 2513 + /* Block level API (DEPRECATED) */ 2514 + /* ========================================= */ 3110 2515 3111 2516 /*! 2517 + 2518 + This API is deprecated in favor of the regular compression API. 2519 + You can get the frame header down to 2 bytes by setting: 2520 + - ZSTD_c_format = ZSTD_f_zstd1_magicless 2521 + - ZSTD_c_contentSizeFlag = 0 2522 + - ZSTD_c_checksumFlag = 0 2523 + - ZSTD_c_dictIDFlag = 0 2524 + 2525 + This API is not as well tested as our normal API, so we recommend not using it. 2526 + We will be removing it in a future version. If the normal API doesn't provide 2527 + the functionality you need, please open a GitHub issue. 2528 + 3112 2529 Block functions produce and decode raw zstd blocks, without frame metadata. 3113 2530 Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). 3114 2531 But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. ··· 3131 2524 - It is necessary to init context before starting 3132 2525 + compression : any ZSTD_compressBegin*() variant, including with dictionary 3133 2526 + decompression : any ZSTD_decompressBegin*() variant, including with dictionary 3134 - + copyCCtx() and copyDCtx() can be used too 3135 2527 - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB 3136 2528 + If input is larger than a block size, it's necessary to split input data into multiple blocks 3137 2529 + For inputs larger than a single block, consider using regular ZSTD_compress() instead. ··· 3147 2541 */ 3148 2542 3149 2543 /*===== Raw zstd block functions =====*/ 2544 + ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") 3150 2545 ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); 2546 + ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") 3151 2547 ZSTDLIB_STATIC_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); 2548 + ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") 3152 2549 ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); 2550 + ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") 3153 2551 ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ 3154 2552 3155 2553 3156 2554 #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ 3157 -
+2 -1
lib/zstd/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 2 2 # ################################################################ 3 - # Copyright (c) Facebook, Inc. 3 + # Copyright (c) Meta Platforms, Inc. and affiliates. 4 4 # All rights reserved. 5 5 # 6 6 # This source code is licensed under both the BSD-style license (found in the ··· 26 26 compress/zstd_lazy.o \ 27 27 compress/zstd_ldm.o \ 28 28 compress/zstd_opt.o \ 29 + compress/zstd_preSplit.o \ 29 30 30 31 zstd_decompress-y := \ 31 32 zstd_decompress_module.o \
+56
lib/zstd/common/allocations.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 2 + /* 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 4 + * All rights reserved. 5 + * 6 + * This source code is licensed under both the BSD-style license (found in the 7 + * LICENSE file in the root directory of this source tree) and the GPLv2 (found 8 + * in the COPYING file in the root directory of this source tree). 9 + * You may select, at your option, one of the above-listed licenses. 10 + */ 11 + 12 + /* This file provides custom allocation primitives 13 + */ 14 + 15 + #define ZSTD_DEPS_NEED_MALLOC 16 + #include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ 17 + 18 + #include "compiler.h" /* MEM_STATIC */ 19 + #define ZSTD_STATIC_LINKING_ONLY 20 + #include <linux/zstd.h> /* ZSTD_customMem */ 21 + 22 + #ifndef ZSTD_ALLOCATIONS_H 23 + #define ZSTD_ALLOCATIONS_H 24 + 25 + /* custom memory allocation functions */ 26 + 27 + MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) 28 + { 29 + if (customMem.customAlloc) 30 + return customMem.customAlloc(customMem.opaque, size); 31 + return ZSTD_malloc(size); 32 + } 33 + 34 + MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) 35 + { 36 + if (customMem.customAlloc) { 37 + /* calloc implemented as malloc+memset; 38 + * not as efficient as calloc, but next best guess for custom malloc */ 39 + void* const ptr = customMem.customAlloc(customMem.opaque, size); 40 + ZSTD_memset(ptr, 0, size); 41 + return ptr; 42 + } 43 + return ZSTD_calloc(1, size); 44 + } 45 + 46 + MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) 47 + { 48 + if (ptr!=NULL) { 49 + if (customMem.customFree) 50 + customMem.customFree(customMem.opaque, ptr); 51 + else 52 + ZSTD_free(ptr); 53 + } 54 + } 55 + 56 + #endif /* ZSTD_ALLOCATIONS_H */
+150
lib/zstd/common/bits.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 2 + /* 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 4 + * All rights reserved. 5 + * 6 + * This source code is licensed under both the BSD-style license (found in the 7 + * LICENSE file in the root directory of this source tree) and the GPLv2 (found 8 + * in the COPYING file in the root directory of this source tree). 9 + * You may select, at your option, one of the above-listed licenses. 10 + */ 11 + 12 + #ifndef ZSTD_BITS_H 13 + #define ZSTD_BITS_H 14 + 15 + #include "mem.h" 16 + 17 + MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val) 18 + { 19 + assert(val != 0); 20 + { 21 + static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3, 22 + 30, 22, 20, 15, 25, 17, 4, 8, 23 + 31, 27, 13, 23, 21, 19, 16, 7, 24 + 26, 12, 18, 6, 11, 5, 10, 9}; 25 + return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27]; 26 + } 27 + } 28 + 29 + MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val) 30 + { 31 + assert(val != 0); 32 + #if (__GNUC__ >= 4) 33 + return (unsigned)__builtin_ctz(val); 34 + #else 35 + return ZSTD_countTrailingZeros32_fallback(val); 36 + #endif 37 + } 38 + 39 + MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) 40 + { 41 + assert(val != 0); 42 + { 43 + static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29, 44 + 11, 14, 16, 18, 22, 25, 3, 30, 45 + 8, 12, 20, 28, 15, 17, 24, 7, 46 + 19, 27, 23, 6, 26, 5, 4, 31}; 47 + val |= val >> 1; 48 + val |= val >> 2; 49 + val |= val >> 4; 50 + val |= val >> 8; 51 + val |= val >> 16; 52 + return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27]; 53 + } 54 + } 55 + 56 + MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val) 57 + { 58 + assert(val != 0); 59 + #if (__GNUC__ >= 4) 60 + return (unsigned)__builtin_clz(val); 61 + #else 62 + return ZSTD_countLeadingZeros32_fallback(val); 63 + #endif 64 + } 65 + 66 + MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val) 67 + { 68 + assert(val != 0); 69 + #if (__GNUC__ >= 4) && defined(__LP64__) 70 + return (unsigned)__builtin_ctzll(val); 71 + #else 72 + { 73 + U32 mostSignificantWord = (U32)(val >> 32); 74 + U32 leastSignificantWord = (U32)val; 75 + if (leastSignificantWord == 0) { 76 + return 32 + ZSTD_countTrailingZeros32(mostSignificantWord); 77 + } else { 78 + return ZSTD_countTrailingZeros32(leastSignificantWord); 79 + } 80 + } 81 + #endif 82 + } 83 + 84 + MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val) 85 + { 86 + assert(val != 0); 87 + #if (__GNUC__ >= 4) 88 + return (unsigned)(__builtin_clzll(val)); 89 + #else 90 + { 91 + U32 mostSignificantWord = (U32)(val >> 32); 92 + U32 leastSignificantWord = (U32)val; 93 + if (mostSignificantWord == 0) { 94 + return 32 + ZSTD_countLeadingZeros32(leastSignificantWord); 95 + } else { 96 + return ZSTD_countLeadingZeros32(mostSignificantWord); 97 + } 98 + } 99 + #endif 100 + } 101 + 102 + MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val) 103 + { 104 + if (MEM_isLittleEndian()) { 105 + if (MEM_64bits()) { 106 + return ZSTD_countTrailingZeros64((U64)val) >> 3; 107 + } else { 108 + return ZSTD_countTrailingZeros32((U32)val) >> 3; 109 + } 110 + } else { /* Big Endian CPU */ 111 + if (MEM_64bits()) { 112 + return ZSTD_countLeadingZeros64((U64)val) >> 3; 113 + } else { 114 + return ZSTD_countLeadingZeros32((U32)val) >> 3; 115 + } 116 + } 117 + } 118 + 119 + MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ 120 + { 121 + assert(val != 0); 122 + return 31 - ZSTD_countLeadingZeros32(val); 123 + } 124 + 125 + /* ZSTD_rotateRight_*(): 126 + * Rotates a bitfield to the right by "count" bits. 127 + * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts 128 + */ 129 + MEM_STATIC 130 + U64 ZSTD_rotateRight_U64(U64 const value, U32 count) { 131 + assert(count < 64); 132 + count &= 0x3F; /* for fickle pattern recognition */ 133 + return (value >> count) | (U64)(value << ((0U - count) & 0x3F)); 134 + } 135 + 136 + MEM_STATIC 137 + U32 ZSTD_rotateRight_U32(U32 const value, U32 count) { 138 + assert(count < 32); 139 + count &= 0x1F; /* for fickle pattern recognition */ 140 + return (value >> count) | (U32)(value << ((0U - count) & 0x1F)); 141 + } 142 + 143 + MEM_STATIC 144 + U16 ZSTD_rotateRight_U16(U16 const value, U32 count) { 145 + assert(count < 16); 146 + count &= 0x0F; /* for fickle pattern recognition */ 147 + return (value >> count) | (U16)(value << ((0U - count) & 0x0F)); 148 + } 149 + 150 + #endif /* ZSTD_BITS_H */
+74 -81
lib/zstd/common/bitstream.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* ****************************************************************** 2 3 * bitstream 3 4 * Part of FSE library 4 - * Copyright (c) Yann Collet, Facebook, Inc. 5 + * Copyright (c) Meta Platforms, Inc. and affiliates. 5 6 * 6 7 * You can contact the author at : 7 8 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy ··· 28 27 #include "compiler.h" /* UNLIKELY() */ 29 28 #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ 30 29 #include "error_private.h" /* error codes and messages */ 31 - 30 + #include "bits.h" /* ZSTD_highbit32 */ 32 31 33 32 /*========================================= 34 33 * Target specific ··· 42 41 /*-****************************************** 43 42 * bitStream encoding API (write forward) 44 43 ********************************************/ 44 + typedef size_t BitContainerType; 45 45 /* bitStream can mix input from multiple sources. 46 46 * A critical property of these streams is that they encode and decode in **reverse** direction. 47 47 * So the first bit sequence you add will be the last to be read, like a LIFO stack. 48 48 */ 49 49 typedef struct { 50 - size_t bitContainer; 50 + BitContainerType bitContainer; 51 51 unsigned bitPos; 52 52 char* startPtr; 53 53 char* ptr; ··· 56 54 } BIT_CStream_t; 57 55 58 56 MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); 59 - MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); 57 + MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits); 60 58 MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); 61 59 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); 62 60 ··· 65 63 * `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. 66 64 * 67 65 * bits are first added to a local register. 68 - * Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. 66 + * Local register is BitContainerType, 64-bits on 64-bits systems, or 32-bits on 32-bits systems. 69 67 * Writing data into memory is an explicit operation, performed by the flushBits function. 70 68 * Hence keep track how many bits are potentially stored into local register to avoid register overflow. 71 69 * After a flushBits, a maximum of 7 bits might still be stored into local register. ··· 82 80 * bitStream decoding API (read backward) 83 81 **********************************************/ 84 82 typedef struct { 85 - size_t bitContainer; 83 + BitContainerType bitContainer; 86 84 unsigned bitsConsumed; 87 85 const char* ptr; 88 86 const char* start; 89 87 const char* limitPtr; 90 88 } BIT_DStream_t; 91 89 92 - typedef enum { BIT_DStream_unfinished = 0, 93 - BIT_DStream_endOfBuffer = 1, 94 - BIT_DStream_completed = 2, 95 - BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ 96 - /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ 90 + typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */ 91 + BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */ 92 + BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */ 93 + BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */ 94 + } BIT_DStream_status; /* result of BIT_reloadDStream() */ 97 95 98 96 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); 99 - MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); 97 + MEM_STATIC BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); 100 98 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); 101 99 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); 102 100 103 101 104 102 /* Start by invoking BIT_initDStream(). 105 103 * A chunk of the bitStream is then stored into a local register. 106 - * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). 104 + * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType). 107 105 * You can then retrieve bitFields stored into the local register, **in reverse order**. 108 106 * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. 109 107 * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. ··· 115 113 /*-**************************************** 116 114 * unsafe API 117 115 ******************************************/ 118 - MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); 116 + MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits); 119 117 /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ 120 118 121 119 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); ··· 123 121 124 122 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); 125 123 /* faster, but works only if nbBits >= 1 */ 126 - 127 - 128 - 129 - /*-************************************************************** 130 - * Internal functions 131 - ****************************************************************/ 132 - MEM_STATIC unsigned BIT_highbit32 (U32 val) 133 - { 134 - assert(val != 0); 135 - { 136 - # if (__GNUC__ >= 3) /* Use GCC Intrinsic */ 137 - return __builtin_clz (val) ^ 31; 138 - # else /* Software version */ 139 - static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 140 - 11, 14, 16, 18, 22, 25, 3, 30, 141 - 8, 12, 20, 28, 15, 17, 24, 7, 142 - 19, 27, 23, 6, 26, 5, 4, 31 }; 143 - U32 v = val; 144 - v |= v >> 1; 145 - v |= v >> 2; 146 - v |= v >> 4; 147 - v |= v >> 8; 148 - v |= v >> 16; 149 - return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; 150 - # endif 151 - } 152 - } 153 124 154 125 /*===== Local Constants =====*/ 155 126 static const unsigned BIT_mask[] = { ··· 153 178 return 0; 154 179 } 155 180 181 + FORCE_INLINE_TEMPLATE BitContainerType BIT_getLowerBits(BitContainerType bitContainer, U32 const nbBits) 182 + { 183 + assert(nbBits < BIT_MASK_SIZE); 184 + return bitContainer & BIT_mask[nbBits]; 185 + } 186 + 156 187 /*! BIT_addBits() : 157 188 * can add up to 31 bits into `bitC`. 158 189 * Note : does not check for register overflow ! */ 159 190 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, 160 - size_t value, unsigned nbBits) 191 + BitContainerType value, unsigned nbBits) 161 192 { 162 193 DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); 163 194 assert(nbBits < BIT_MASK_SIZE); 164 195 assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); 165 - bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; 196 + bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos; 166 197 bitC->bitPos += nbBits; 167 198 } 168 199 ··· 176 195 * works only if `value` is _clean_, 177 196 * meaning all high bits above nbBits are 0 */ 178 197 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, 179 - size_t value, unsigned nbBits) 198 + BitContainerType value, unsigned nbBits) 180 199 { 181 200 assert((value>>nbBits) == 0); 182 201 assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); ··· 223 242 BIT_addBitsFast(bitC, 1, 1); /* endMark */ 224 243 BIT_flushBits(bitC); 225 244 if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ 226 - return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); 245 + return (size_t)(bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); 227 246 } 228 247 229 248 ··· 247 266 bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); 248 267 bitD->bitContainer = MEM_readLEST(bitD->ptr); 249 268 { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; 250 - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ 269 + bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ 251 270 if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } 252 271 } else { 253 272 bitD->ptr = bitD->start; 254 273 bitD->bitContainer = *(const BYTE*)(bitD->start); 255 274 switch(srcSize) 256 275 { 257 - case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); 276 + case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); 258 277 ZSTD_FALLTHROUGH; 259 278 260 - case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); 279 + case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); 261 280 ZSTD_FALLTHROUGH; 262 281 263 - case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); 282 + case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); 264 283 ZSTD_FALLTHROUGH; 265 284 266 - case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; 285 + case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24; 267 286 ZSTD_FALLTHROUGH; 268 287 269 - case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; 288 + case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16; 270 289 ZSTD_FALLTHROUGH; 271 290 272 - case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; 291 + case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8; 273 292 ZSTD_FALLTHROUGH; 274 293 275 294 default: break; 276 295 } 277 296 { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; 278 - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; 297 + bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; 279 298 if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ 280 299 } 281 300 bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; ··· 284 303 return srcSize; 285 304 } 286 305 287 - MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start) 306 + FORCE_INLINE_TEMPLATE BitContainerType BIT_getUpperBits(BitContainerType bitContainer, U32 const start) 288 307 { 289 308 return bitContainer >> start; 290 309 } 291 310 292 - MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) 311 + FORCE_INLINE_TEMPLATE BitContainerType BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits) 293 312 { 294 313 U32 const regMask = sizeof(bitContainer)*8 - 1; 295 314 /* if start > regMask, bitstream is corrupted, and result is undefined */ ··· 299 318 * such cpus old (pre-Haswell, 2013) and their performance is not of that 300 319 * importance. 301 320 */ 302 - #if defined(__x86_64__) || defined(_M_X86) 321 + #if defined(__x86_64__) || defined(_M_X64) 303 322 return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1); 304 323 #else 305 324 return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; 306 325 #endif 307 - } 308 - 309 - MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) 310 - { 311 - assert(nbBits < BIT_MASK_SIZE); 312 - return bitContainer & BIT_mask[nbBits]; 313 326 } 314 327 315 328 /*! BIT_lookBits() : ··· 312 337 * On 32-bits, maxNbBits==24. 313 338 * On 64-bits, maxNbBits==56. 314 339 * @return : value extracted */ 315 - MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) 340 + FORCE_INLINE_TEMPLATE BitContainerType BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) 316 341 { 317 342 /* arbitrate between double-shift and shift+mask */ 318 343 #if 1 ··· 328 353 329 354 /*! BIT_lookBitsFast() : 330 355 * unsafe version; only works if nbBits >= 1 */ 331 - MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) 356 + MEM_STATIC BitContainerType BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) 332 357 { 333 358 U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; 334 359 assert(nbBits >= 1); 335 360 return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); 336 361 } 337 362 338 - MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) 363 + FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) 339 364 { 340 365 bitD->bitsConsumed += nbBits; 341 366 } ··· 344 369 * Read (consume) next n bits from local register and update. 345 370 * Pay attention to not read more than nbBits contained into local register. 346 371 * @return : extracted value. */ 347 - MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) 372 + FORCE_INLINE_TEMPLATE BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) 348 373 { 349 - size_t const value = BIT_lookBits(bitD, nbBits); 374 + BitContainerType const value = BIT_lookBits(bitD, nbBits); 350 375 BIT_skipBits(bitD, nbBits); 351 376 return value; 352 377 } 353 378 354 379 /*! BIT_readBitsFast() : 355 - * unsafe version; only works only if nbBits >= 1 */ 356 - MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) 380 + * unsafe version; only works if nbBits >= 1 */ 381 + MEM_STATIC BitContainerType BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) 357 382 { 358 - size_t const value = BIT_lookBitsFast(bitD, nbBits); 383 + BitContainerType const value = BIT_lookBitsFast(bitD, nbBits); 359 384 assert(nbBits >= 1); 360 385 BIT_skipBits(bitD, nbBits); 361 386 return value; 387 + } 388 + 389 + /*! BIT_reloadDStream_internal() : 390 + * Simple variant of BIT_reloadDStream(), with two conditions: 391 + * 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8 392 + * 2. look window is valid after shifted down : bitD->ptr >= bitD->start 393 + */ 394 + MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD) 395 + { 396 + assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); 397 + bitD->ptr -= bitD->bitsConsumed >> 3; 398 + assert(bitD->ptr >= bitD->start); 399 + bitD->bitsConsumed &= 7; 400 + bitD->bitContainer = MEM_readLEST(bitD->ptr); 401 + return BIT_DStream_unfinished; 362 402 } 363 403 364 404 /*! BIT_reloadDStreamFast() : ··· 386 396 { 387 397 if (UNLIKELY(bitD->ptr < bitD->limitPtr)) 388 398 return BIT_DStream_overflow; 389 - assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); 390 - bitD->ptr -= bitD->bitsConsumed >> 3; 391 - bitD->bitsConsumed &= 7; 392 - bitD->bitContainer = MEM_readLEST(bitD->ptr); 393 - return BIT_DStream_unfinished; 399 + return BIT_reloadDStream_internal(bitD); 394 400 } 395 401 396 402 /*! BIT_reloadDStream() : 397 403 * Refill `bitD` from buffer previously set in BIT_initDStream() . 398 - * This function is safe, it guarantees it will not read beyond src buffer. 404 + * This function is safe, it guarantees it will not never beyond src buffer. 399 405 * @return : status of `BIT_DStream_t` internal register. 400 406 * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ 401 - MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) 407 + FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) 402 408 { 403 - if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ 409 + /* note : once in overflow mode, a bitstream remains in this mode until it's reset */ 410 + if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) { 411 + static const BitContainerType zeroFilled = 0; 412 + bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */ 413 + /* overflow detected, erroneous scenario or end of stream: no update */ 404 414 return BIT_DStream_overflow; 415 + } 416 + 417 + assert(bitD->ptr >= bitD->start); 405 418 406 419 if (bitD->ptr >= bitD->limitPtr) { 407 - return BIT_reloadDStreamFast(bitD); 420 + return BIT_reloadDStream_internal(bitD); 408 421 } 409 422 if (bitD->ptr == bitD->start) { 423 + /* reached end of bitStream => no update */ 410 424 if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; 411 425 return BIT_DStream_completed; 412 426 } 413 - /* start < ptr < limitPtr */ 427 + /* start < ptr < limitPtr => cautious update */ 414 428 { U32 nbBytes = bitD->bitsConsumed >> 3; 415 429 BIT_DStream_status result = BIT_DStream_unfinished; 416 430 if (bitD->ptr - nbBytes < bitD->start) { ··· 435 441 { 436 442 return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); 437 443 } 438 - 439 444 440 445 #endif /* BITSTREAM_H_MODULE */
+128 -23
lib/zstd/common/compiler.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 11 10 12 11 #ifndef ZSTD_COMPILER_H 13 12 #define ZSTD_COMPILER_H 13 + 14 + #include <linux/types.h> 14 15 15 16 #include "portability_macros.h" 16 17 ··· 44 41 */ 45 42 #define WIN_CDECL 46 43 44 + /* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ 45 + #define UNUSED_ATTR __attribute__((unused)) 46 + 47 47 /* 48 48 * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant 49 49 * parameters. They must be inlined for the compiler to eliminate the constant 50 50 * branches. 51 51 */ 52 - #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR 52 + #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR 53 53 /* 54 54 * HINT_INLINE is used to help the compiler generate better code. It is *not* 55 55 * used for "templates", so it can be tweaked based on the compilers ··· 67 61 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 68 62 # define HINT_INLINE static INLINE_KEYWORD 69 63 #else 70 - # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR 64 + # define HINT_INLINE FORCE_INLINE_TEMPLATE 71 65 #endif 72 66 73 - /* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ 74 - #define UNUSED_ATTR __attribute__((unused)) 67 + /* "soft" inline : 68 + * The compiler is free to select if it's a good idea to inline or not. 69 + * The main objective is to silence compiler warnings 70 + * when a defined function in included but not used. 71 + * 72 + * Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit. 73 + * Updating the prefix is probably preferable, but requires a fairly large codemod, 74 + * since this name is used everywhere. 75 + */ 76 + #ifndef MEM_STATIC /* already defined in Linux Kernel mem.h */ 77 + #define MEM_STATIC static __inline UNUSED_ATTR 78 + #endif 75 79 76 80 /* force no inlining */ 77 81 #define FORCE_NOINLINE static __attribute__((__noinline__)) ··· 102 86 # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) 103 87 # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) 104 88 #elif defined(__aarch64__) 105 - # define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) 106 - # define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) 89 + # define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0) 90 + # define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0) 107 91 #else 108 - # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ 109 - # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ 92 + # define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */ 93 + # define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */ 110 94 #endif /* NO_PREFETCH */ 111 95 112 96 #define CACHELINE_SIZE 64 113 97 114 - #define PREFETCH_AREA(p, s) { \ 115 - const char* const _ptr = (const char*)(p); \ 116 - size_t const _size = (size_t)(s); \ 117 - size_t _pos; \ 118 - for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ 119 - PREFETCH_L2(_ptr + _pos); \ 120 - } \ 121 - } 98 + #define PREFETCH_AREA(p, s) \ 99 + do { \ 100 + const char* const _ptr = (const char*)(p); \ 101 + size_t const _size = (size_t)(s); \ 102 + size_t _pos; \ 103 + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ 104 + PREFETCH_L2(_ptr + _pos); \ 105 + } \ 106 + } while (0) 122 107 123 108 /* vectorization 124 109 * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax, ··· 143 126 #define UNLIKELY(x) (__builtin_expect((x), 0)) 144 127 145 128 #if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))) 146 - # define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); } 129 + # define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0) 147 130 #else 148 - # define ZSTD_UNREACHABLE { assert(0); } 131 + # define ZSTD_UNREACHABLE do { assert(0); } while (0) 149 132 #endif 150 133 151 134 /* disable warnings */ 152 - 153 - /*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ 154 - 155 135 156 136 /* compile time determination of SIMD support */ 157 137 ··· 172 158 #define ZSTD_FALLTHROUGH fallthrough 173 159 174 160 /*-************************************************************** 175 - * Alignment check 161 + * Alignment 176 162 *****************************************************************/ 163 + 164 + /* @return 1 if @u is a 2^n value, 0 otherwise 165 + * useful to check a value is valid for alignment restrictions */ 166 + MEM_STATIC int ZSTD_isPower2(size_t u) { 167 + return (u & (u-1)) == 0; 168 + } 177 169 178 170 /* this test was initially positioned in mem.h, 179 171 * but this file is removed (or replaced) for linux kernel ··· 195 175 196 176 #endif /* ZSTD_ALIGNOF */ 197 177 178 + #ifndef ZSTD_ALIGNED 179 + /* C90-compatible alignment macro (GCC/Clang). Adjust for other compilers if needed. */ 180 + #define ZSTD_ALIGNED(a) __attribute__((aligned(a))) 181 + #endif /* ZSTD_ALIGNED */ 182 + 183 + 198 184 /*-************************************************************** 199 185 * Sanitizer 200 186 *****************************************************************/ 187 + 188 + /* 189 + * Zstd relies on pointer overflow in its decompressor. 190 + * We add this attribute to functions that rely on pointer overflow. 191 + */ 192 + #ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 193 + # if __has_attribute(no_sanitize) 194 + # if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8 195 + /* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */ 196 + # define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("signed-integer-overflow"))) 197 + # else 198 + /* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */ 199 + # define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("pointer-overflow"))) 200 + # endif 201 + # else 202 + # define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 203 + # endif 204 + #endif 205 + 206 + /* 207 + * Helper function to perform a wrapped pointer difference without triggering 208 + * UBSAN. 209 + * 210 + * @returns lhs - rhs with wrapping 211 + */ 212 + MEM_STATIC 213 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 214 + ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs) 215 + { 216 + return lhs - rhs; 217 + } 218 + 219 + /* 220 + * Helper function to perform a wrapped pointer add without triggering UBSAN. 221 + * 222 + * @return ptr + add with wrapping 223 + */ 224 + MEM_STATIC 225 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 226 + unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add) 227 + { 228 + return ptr + add; 229 + } 230 + 231 + /* 232 + * Helper function to perform a wrapped pointer subtraction without triggering 233 + * UBSAN. 234 + * 235 + * @return ptr - sub with wrapping 236 + */ 237 + MEM_STATIC 238 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 239 + unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub) 240 + { 241 + return ptr - sub; 242 + } 243 + 244 + /* 245 + * Helper function to add to a pointer that works around C's undefined behavior 246 + * of adding 0 to NULL. 247 + * 248 + * @returns `ptr + add` except it defines `NULL + 0 == NULL`. 249 + */ 250 + MEM_STATIC 251 + unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add) 252 + { 253 + return add > 0 ? ptr + add : ptr; 254 + } 255 + 256 + /* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an 257 + * abundance of caution, disable our custom poisoning on mingw. */ 258 + #ifdef __MINGW32__ 259 + #ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE 260 + #define ZSTD_ASAN_DONT_POISON_WORKSPACE 1 261 + #endif 262 + #ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE 263 + #define ZSTD_MSAN_DONT_POISON_WORKSPACE 1 264 + #endif 265 + #endif 201 266 202 267 203 268
+2 -1
lib/zstd/common/cpu.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the
+8 -1
lib/zstd/common/debug.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* ****************************************************************** 2 3 * debug 3 4 * Part of FSE library 4 - * Copyright (c) Yann Collet, Facebook, Inc. 5 + * Copyright (c) Meta Platforms, Inc. and affiliates. 5 6 * 6 7 * You can contact the author at : 7 8 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy ··· 22 21 23 22 #include "debug.h" 24 23 24 + #if (DEBUGLEVEL>=2) 25 + /* We only use this when DEBUGLEVEL>=2, but we get -Werror=pedantic errors if a 26 + * translation unit is empty. So remove this from Linux kernel builds, but 27 + * otherwise just leave it in. 28 + */ 25 29 int g_debuglevel = DEBUGLEVEL; 30 + #endif
+22 -15
lib/zstd/common/debug.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* ****************************************************************** 2 3 * debug 3 4 * Part of FSE library 4 - * Copyright (c) Yann Collet, Facebook, Inc. 5 + * Copyright (c) Meta Platforms, Inc. and affiliates. 5 6 * 6 7 * You can contact the author at : 7 8 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy ··· 32 31 33 32 #ifndef DEBUG_H_12987983217 34 33 #define DEBUG_H_12987983217 35 - 36 34 37 35 38 36 /* static assert is triggered at compile time, leaving no runtime artefact. ··· 82 82 It's useful when enabling very verbose levels 83 83 on selective conditions (such as position in src) */ 84 84 85 - # define RAWLOG(l, ...) { \ 86 - if (l<=g_debuglevel) { \ 87 - ZSTD_DEBUG_PRINT(__VA_ARGS__); \ 88 - } } 89 - # define DEBUGLOG(l, ...) { \ 90 - if (l<=g_debuglevel) { \ 91 - ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \ 92 - ZSTD_DEBUG_PRINT(" \n"); \ 93 - } } 85 + # define RAWLOG(l, ...) \ 86 + do { \ 87 + if (l<=g_debuglevel) { \ 88 + ZSTD_DEBUG_PRINT(__VA_ARGS__); \ 89 + } \ 90 + } while (0) 91 + 92 + #define STRINGIFY(x) #x 93 + #define TOSTRING(x) STRINGIFY(x) 94 + #define LINE_AS_STRING TOSTRING(__LINE__) 95 + 96 + # define DEBUGLOG(l, ...) \ 97 + do { \ 98 + if (l<=g_debuglevel) { \ 99 + ZSTD_DEBUG_PRINT(__FILE__ ":" LINE_AS_STRING ": " __VA_ARGS__); \ 100 + ZSTD_DEBUG_PRINT(" \n"); \ 101 + } \ 102 + } while (0) 94 103 #else 95 - # define RAWLOG(l, ...) {} /* disabled */ 96 - # define DEBUGLOG(l, ...) {} /* disabled */ 104 + # define RAWLOG(l, ...) do { } while (0) /* disabled */ 105 + # define DEBUGLOG(l, ...) do { } while (0) /* disabled */ 97 106 #endif 98 - 99 - 100 107 101 108 #endif /* DEBUG_H_12987983217 */
+13 -29
lib/zstd/common/entropy_common.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* ****************************************************************** 2 3 * Common functions of New Generation Entropy library 3 - * Copyright (c) Yann Collet, Facebook, Inc. 4 + * Copyright (c) Meta Platforms, Inc. and affiliates. 4 5 * 5 6 * You can contact the author at : 6 7 * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy ··· 20 19 #include "error_private.h" /* ERR_*, ERROR */ 21 20 #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ 22 21 #include "fse.h" 23 - #define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ 24 22 #include "huf.h" 23 + #include "bits.h" /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */ 25 24 26 25 27 26 /*=== Version ===*/ ··· 39 38 /*-************************************************************** 40 39 * FSE NCount encoding-decoding 41 40 ****************************************************************/ 42 - static U32 FSE_ctz(U32 val) 43 - { 44 - assert(val != 0); 45 - { 46 - # if (__GNUC__ >= 3) /* GCC Intrinsic */ 47 - return __builtin_ctz(val); 48 - # else /* Software version */ 49 - U32 count = 0; 50 - while ((val & 1) == 0) { 51 - val >>= 1; 52 - ++count; 53 - } 54 - return count; 55 - # endif 56 - } 57 - } 58 - 59 41 FORCE_INLINE_TEMPLATE 60 42 size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, 61 43 const void* headerBuffer, size_t hbSize) ··· 86 102 * repeat. 87 103 * Avoid UB by setting the high bit to 1. 88 104 */ 89 - int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; 105 + int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1; 90 106 while (repeats >= 12) { 91 107 charnum += 3 * 12; 92 108 if (LIKELY(ip <= iend-7)) { ··· 97 113 ip = iend - 4; 98 114 } 99 115 bitStream = MEM_readLE32(ip) >> bitCount; 100 - repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; 116 + repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1; 101 117 } 102 118 charnum += 3 * repeats; 103 119 bitStream >>= 2 * repeats; ··· 162 178 * know that threshold > 1. 163 179 */ 164 180 if (remaining <= 1) break; 165 - nbBits = BIT_highbit32(remaining) + 1; 181 + nbBits = ZSTD_highbit32(remaining) + 1; 166 182 threshold = 1 << (nbBits - 1); 167 183 } 168 184 if (charnum >= maxSV1) break; ··· 237 253 const void* src, size_t srcSize) 238 254 { 239 255 U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; 240 - return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); 256 + return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0); 241 257 } 242 258 243 259 FORCE_INLINE_TEMPLATE size_t ··· 285 301 if (weightTotal == 0) return ERROR(corruption_detected); 286 302 287 303 /* get last non-null symbol weight (implied, total must be 2^n) */ 288 - { U32 const tableLog = BIT_highbit32(weightTotal) + 1; 304 + { U32 const tableLog = ZSTD_highbit32(weightTotal) + 1; 289 305 if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); 290 306 *tableLogPtr = tableLog; 291 307 /* determine last weight */ 292 308 { U32 const total = 1 << tableLog; 293 309 U32 const rest = total - weightTotal; 294 - U32 const verif = 1 << BIT_highbit32(rest); 295 - U32 const lastWeight = BIT_highbit32(rest) + 1; 310 + U32 const verif = 1 << ZSTD_highbit32(rest); 311 + U32 const lastWeight = ZSTD_highbit32(rest) + 1; 296 312 if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ 297 313 huffWeight[oSize] = (BYTE)lastWeight; 298 314 rankStats[lastWeight]++; ··· 329 345 U32* nbSymbolsPtr, U32* tableLogPtr, 330 346 const void* src, size_t srcSize, 331 347 void* workSpace, size_t wkspSize, 332 - int bmi2) 348 + int flags) 333 349 { 334 350 #if DYNAMIC_BMI2 335 - if (bmi2) { 351 + if (flags & HUF_flags_bmi2) { 336 352 return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); 337 353 } 338 354 #endif 339 - (void)bmi2; 355 + (void)flags; 340 356 return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); 341 357 }
+11 -2
lib/zstd/common/error_private.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 28 27 case PREFIX(version_unsupported): return "Version not supported"; 29 28 case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; 30 29 case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; 31 - case PREFIX(corruption_detected): return "Corrupted block detected"; 30 + case PREFIX(corruption_detected): return "Data corruption detected"; 32 31 case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; 32 + case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification"; 33 33 case PREFIX(parameter_unsupported): return "Unsupported parameter"; 34 + case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters"; 34 35 case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; 35 36 case PREFIX(init_missing): return "Context should be init first"; 36 37 case PREFIX(memory_allocation): return "Allocation error : not enough memory"; ··· 41 38 case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; 42 39 case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; 43 40 case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; 41 + case PREFIX(cannotProduce_uncompressedBlock): return "This mode cannot generate an uncompressed block"; 42 + case PREFIX(stabilityCondition_notRespected): return "pledged buffer stability condition is not respected"; 44 43 case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; 45 44 case PREFIX(dictionary_wrong): return "Dictionary mismatch"; 46 45 case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; 47 46 case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; 48 47 case PREFIX(srcSize_wrong): return "Src size is incorrect"; 49 48 case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; 49 + case PREFIX(noForwardProgress_destFull): return "Operation made no progress over multiple calls, due to output buffer being full"; 50 + case PREFIX(noForwardProgress_inputEmpty): return "Operation made no progress over multiple calls, due to input being empty"; 50 51 /* following error codes are not stable and may be removed or changed in a future version */ 51 52 case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; 52 53 case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; 53 54 case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; 54 55 case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; 56 + case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code"; 57 + case PREFIX(externalSequences_invalid): return "External sequences are not valid"; 55 58 case PREFIX(maxCode): 56 59 default: return notErrorCode; 57 60 }
+47 -41
lib/zstd/common/error_private.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 14 13 #ifndef ERROR_H_MODULE 15 14 #define ERROR_H_MODULE 16 15 17 - 18 - 19 16 /* **************************************** 20 17 * Dependencies 21 18 ******************************************/ ··· 21 22 #include "compiler.h" 22 23 #include "debug.h" 23 24 #include "zstd_deps.h" /* size_t */ 24 - 25 25 26 26 /* **************************************** 27 27 * Compiler-specific ··· 47 49 ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } 48 50 49 51 /* check and forward error code */ 50 - #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e 51 - #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } 52 + #define CHECK_V_F(e, f) \ 53 + size_t const e = f; \ 54 + do { \ 55 + if (ERR_isError(e)) \ 56 + return e; \ 57 + } while (0) 58 + #define CHECK_F(f) do { CHECK_V_F(_var_err__, f); } while (0) 52 59 53 60 54 61 /*-**************************************** ··· 87 84 * We want to force this function invocation to be syntactically correct, but 88 85 * we don't want to force runtime evaluation of its arguments. 89 86 */ 90 - #define _FORCE_HAS_FORMAT_STRING(...) \ 91 - if (0) { \ 92 - _force_has_format_string(__VA_ARGS__); \ 93 - } 87 + #define _FORCE_HAS_FORMAT_STRING(...) \ 88 + do { \ 89 + if (0) { \ 90 + _force_has_format_string(__VA_ARGS__); \ 91 + } \ 92 + } while (0) 94 93 95 94 #define ERR_QUOTE(str) #str 96 95 ··· 103 98 * In order to do that (particularly, printing the conditional that failed), 104 99 * this can't just wrap RETURN_ERROR(). 105 100 */ 106 - #define RETURN_ERROR_IF(cond, err, ...) \ 107 - if (cond) { \ 108 - RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ 109 - __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \ 110 - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ 111 - RAWLOG(3, ": " __VA_ARGS__); \ 112 - RAWLOG(3, "\n"); \ 113 - return ERROR(err); \ 114 - } 101 + #define RETURN_ERROR_IF(cond, err, ...) \ 102 + do { \ 103 + if (cond) { \ 104 + RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ 105 + __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \ 106 + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ 107 + RAWLOG(3, ": " __VA_ARGS__); \ 108 + RAWLOG(3, "\n"); \ 109 + return ERROR(err); \ 110 + } \ 111 + } while (0) 115 112 116 113 /* 117 114 * Unconditionally return the specified error. 118 115 * 119 116 * In debug modes, prints additional information. 120 117 */ 121 - #define RETURN_ERROR(err, ...) \ 122 - do { \ 123 - RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ 124 - __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \ 125 - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ 126 - RAWLOG(3, ": " __VA_ARGS__); \ 127 - RAWLOG(3, "\n"); \ 128 - return ERROR(err); \ 129 - } while(0); 118 + #define RETURN_ERROR(err, ...) \ 119 + do { \ 120 + RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ 121 + __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \ 122 + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ 123 + RAWLOG(3, ": " __VA_ARGS__); \ 124 + RAWLOG(3, "\n"); \ 125 + return ERROR(err); \ 126 + } while(0) 130 127 131 128 /* 132 129 * If the provided expression evaluates to an error code, returns that error code. 133 130 * 134 131 * In debug modes, prints additional information. 135 132 */ 136 - #define FORWARD_IF_ERROR(err, ...) \ 137 - do { \ 138 - size_t const err_code = (err); \ 139 - if (ERR_isError(err_code)) { \ 140 - RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ 141 - __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \ 142 - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ 143 - RAWLOG(3, ": " __VA_ARGS__); \ 144 - RAWLOG(3, "\n"); \ 145 - return err_code; \ 146 - } \ 147 - } while(0); 148 - 133 + #define FORWARD_IF_ERROR(err, ...) \ 134 + do { \ 135 + size_t const err_code = (err); \ 136 + if (ERR_isError(err_code)) { \ 137 + RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ 138 + __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \ 139 + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ 140 + RAWLOG(3, ": " __VA_ARGS__); \ 141 + RAWLOG(3, "\n"); \ 142 + return err_code; \ 143 + } \ 144 + } while(0) 149 145 150 146 #endif /* ERROR_H_MODULE */
+9 -94
lib/zstd/common/fse.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* ****************************************************************** 2 3 * FSE : Finite State Entropy codec 3 4 * Public Prototypes declaration 4 - * Copyright (c) Yann Collet, Facebook, Inc. 5 + * Copyright (c) Meta Platforms, Inc. and affiliates. 5 6 * 6 7 * You can contact the author at : 7 8 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy ··· 12 11 * in the COPYING file in the root directory of this source tree). 13 12 * You may select, at your option, one of the above-listed licenses. 14 13 ****************************************************************** */ 15 - 16 - 17 14 #ifndef FSE_H 18 15 #define FSE_H 19 16 ··· 20 21 * Dependencies 21 22 ******************************************/ 22 23 #include "zstd_deps.h" /* size_t, ptrdiff_t */ 23 - 24 24 25 25 /*-***************************************** 26 26 * FSE_PUBLIC_API : control library symbols visibility ··· 48 50 FSE_PUBLIC_API unsigned FSE_versionNumber(void); /*< library version number; to be used when checking dll version */ 49 51 50 52 51 - /*-**************************************** 52 - * FSE simple functions 53 - ******************************************/ 54 - /*! FSE_compress() : 55 - Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. 56 - 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). 57 - @return : size of compressed data (<= dstCapacity). 58 - Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! 59 - if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. 60 - if FSE_isError(return), compression failed (more details using FSE_getErrorName()) 61 - */ 62 - FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, 63 - const void* src, size_t srcSize); 64 - 65 - /*! FSE_decompress(): 66 - Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', 67 - into already allocated destination buffer 'dst', of size 'dstCapacity'. 68 - @return : size of regenerated data (<= maxDstSize), 69 - or an error code, which can be tested using FSE_isError() . 70 - 71 - ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! 72 - Why ? : making this distinction requires a header. 73 - Header management is intentionally delegated to the user layer, which can better manage special cases. 74 - */ 75 - FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, 76 - const void* cSrc, size_t cSrcSize); 77 - 78 - 79 53 /*-***************************************** 80 54 * Tool functions 81 55 ******************************************/ ··· 56 86 /* Error Management */ 57 87 FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ 58 88 FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ 59 - 60 - 61 - /*-***************************************** 62 - * FSE advanced functions 63 - ******************************************/ 64 - /*! FSE_compress2() : 65 - Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' 66 - Both parameters can be defined as '0' to mean : use default value 67 - @return : size of compressed data 68 - Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! 69 - if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. 70 - if FSE_isError(return), it's an error code. 71 - */ 72 - FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); 73 89 74 90 75 91 /*-***************************************** ··· 117 161 /*! Constructor and Destructor of FSE_CTable. 118 162 Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ 119 163 typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ 120 - FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); 121 - FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); 122 164 123 165 /*! FSE_buildCTable(): 124 166 Builds `ct`, which must be already allocated, using FSE_createCTable(). ··· 192 238 unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, 193 239 const void* rBuffer, size_t rBuffSize, int bmi2); 194 240 195 - /*! Constructor and Destructor of FSE_DTable. 196 - Note that its size depends on 'tableLog' */ 197 241 typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ 198 - FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); 199 - FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); 200 - 201 - /*! FSE_buildDTable(): 202 - Builds 'dt', which must be already allocated, using FSE_createDTable(). 203 - return : 0, or an errorCode, which can be tested using FSE_isError() */ 204 - FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); 205 - 206 - /*! FSE_decompress_usingDTable(): 207 - Decompress compressed source `cSrc` of size `cSrcSize` using `dt` 208 - into `dst` which must be already allocated. 209 - @return : size of regenerated data (necessarily <= `dstCapacity`), 210 - or an errorCode, which can be tested using FSE_isError() */ 211 - FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); 212 242 213 243 /*! 214 244 Tutorial : ··· 224 286 225 287 #endif /* FSE_H */ 226 288 289 + 227 290 #if !defined(FSE_H_FSE_STATIC_LINKING_ONLY) 228 291 #define FSE_H_FSE_STATIC_LINKING_ONLY 229 - 230 - /* *** Dependency *** */ 231 292 #include "bitstream.h" 232 - 233 293 234 294 /* ***************************************** 235 295 * Static allocation ··· 253 317 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); 254 318 /*< same as FSE_optimalTableLog(), which used `minus==2` */ 255 319 256 - /* FSE_compress_wksp() : 257 - * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). 258 - * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. 259 - */ 260 - #define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) 261 - size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); 262 - 263 - size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); 264 - /*< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ 265 - 266 320 size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); 267 321 /*< build a fake FSE_CTable, designed to compress always the same symbolValue */ 268 322 ··· 270 344 FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); 271 345 /*< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ 272 346 273 - size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); 274 - /*< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ 275 - 276 - size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); 277 - /*< build a fake FSE_DTable, designed to always generate the same symbolValue */ 278 - 279 - #define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) 347 + #define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) 280 348 #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) 281 - size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); 282 - /*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ 283 - 284 349 size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); 285 - /*< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */ 350 + /*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`. 351 + * Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */ 286 352 287 353 typedef enum { 288 354 FSE_repeat_none, /*< Cannot use the previous table */ ··· 457 539 FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; 458 540 const U16* const stateTable = (const U16*)(statePtr->stateTable); 459 541 U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); 460 - BIT_addBits(bitC, statePtr->value, nbBitsOut); 542 + BIT_addBits(bitC, (BitContainerType)statePtr->value, nbBitsOut); 461 543 statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; 462 544 } 463 545 464 546 MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) 465 547 { 466 - BIT_addBits(bitC, statePtr->value, statePtr->stateLog); 548 + BIT_addBits(bitC, (BitContainerType)statePtr->value, statePtr->stateLog); 467 549 BIT_flushBits(bitC); 468 550 } 469 551 470 552 471 553 /* FSE_getMaxNbBits() : 472 554 * Approximate maximum cost of a symbol, in bits. 473 - * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) 555 + * Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) 474 556 * note 1 : assume symbolValue is valid (<= maxSymbolValue) 475 557 * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ 476 558 MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) ··· 623 705 624 706 #define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3) 625 707 626 - 627 708 #endif /* FSE_STATIC_LINKING_ONLY */ 628 - 629 -
+29 -103
lib/zstd/common/fse_decompress.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* ****************************************************************** 2 3 * FSE : Finite State Entropy decoder 3 - * Copyright (c) Yann Collet, Facebook, Inc. 4 + * Copyright (c) Meta Platforms, Inc. and affiliates. 4 5 * 5 6 * You can contact the author at : 6 7 * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy ··· 23 22 #define FSE_STATIC_LINKING_ONLY 24 23 #include "fse.h" 25 24 #include "error_private.h" 26 - #define ZSTD_DEPS_NEED_MALLOC 27 - #include "zstd_deps.h" 25 + #include "zstd_deps.h" /* ZSTD_memcpy */ 26 + #include "bits.h" /* ZSTD_highbit32 */ 28 27 29 28 30 29 /* ************************************************************** ··· 56 55 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) 57 56 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) 58 57 59 - 60 - /* Function templates */ 61 - FSE_DTable* FSE_createDTable (unsigned tableLog) 62 - { 63 - if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; 64 - return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); 65 - } 66 - 67 - void FSE_freeDTable (FSE_DTable* dt) 68 - { 69 - ZSTD_free(dt); 70 - } 71 - 72 58 static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) 73 59 { 74 60 void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ ··· 84 96 symbolNext[s] = 1; 85 97 } else { 86 98 if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; 87 - symbolNext[s] = normalizedCounter[s]; 99 + symbolNext[s] = (U16)normalizedCounter[s]; 88 100 } } } 89 101 ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); 90 102 } ··· 99 111 * all symbols have counts <= 8. We ensure we have 8 bytes at the end of 100 112 * our buffer to handle the over-write. 101 113 */ 102 - { 103 - U64 const add = 0x0101010101010101ull; 114 + { U64 const add = 0x0101010101010101ull; 104 115 size_t pos = 0; 105 116 U64 sv = 0; 106 117 U32 s; ··· 110 123 for (i = 8; i < n; i += 8) { 111 124 MEM_write64(spread + pos + i, sv); 112 125 } 113 - pos += n; 114 - } 115 - } 126 + pos += (size_t)n; 127 + } } 116 128 /* Now we spread those positions across the table. 117 - * The benefit of doing it in two stages is that we avoid the the 129 + * The benefit of doing it in two stages is that we avoid the 118 130 * variable size inner loop, which caused lots of branch misses. 119 131 * Now we can run through all the positions without any branch misses. 120 - * We unroll the loop twice, since that is what emperically worked best. 132 + * We unroll the loop twice, since that is what empirically worked best. 121 133 */ 122 134 { 123 135 size_t position = 0; ··· 152 166 for (u=0; u<tableSize; u++) { 153 167 FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol); 154 168 U32 const nextState = symbolNext[symbol]++; 155 - tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) ); 169 + tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) ); 156 170 tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); 157 171 } } 158 172 ··· 170 184 /*-******************************************************* 171 185 * Decompression (Byte symbols) 172 186 *********************************************************/ 173 - size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) 174 - { 175 - void* ptr = dt; 176 - FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 177 - void* dPtr = dt + 1; 178 - FSE_decode_t* const cell = (FSE_decode_t*)dPtr; 179 - 180 - DTableH->tableLog = 0; 181 - DTableH->fastMode = 0; 182 - 183 - cell->newState = 0; 184 - cell->symbol = symbolValue; 185 - cell->nbBits = 0; 186 - 187 - return 0; 188 - } 189 - 190 - 191 - size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) 192 - { 193 - void* ptr = dt; 194 - FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; 195 - void* dPtr = dt + 1; 196 - FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; 197 - const unsigned tableSize = 1 << nbBits; 198 - const unsigned tableMask = tableSize - 1; 199 - const unsigned maxSV1 = tableMask+1; 200 - unsigned s; 201 - 202 - /* Sanity checks */ 203 - if (nbBits < 1) return ERROR(GENERIC); /* min size */ 204 - 205 - /* Build Decoding Table */ 206 - DTableH->tableLog = (U16)nbBits; 207 - DTableH->fastMode = 1; 208 - for (s=0; s<maxSV1; s++) { 209 - dinfo[s].newState = 0; 210 - dinfo[s].symbol = (BYTE)s; 211 - dinfo[s].nbBits = (BYTE)nbBits; 212 - } 213 - 214 - return 0; 215 - } 216 187 217 188 FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic( 218 189 void* dst, size_t maxDstSize, ··· 190 247 191 248 FSE_initDState(&state1, &bitD, dt); 192 249 FSE_initDState(&state2, &bitD, dt); 250 + 251 + RETURN_ERROR_IF(BIT_reloadDStream(&bitD)==BIT_DStream_overflow, corruption_detected, ""); 193 252 194 253 #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) 195 254 ··· 232 287 break; 233 288 } } 234 289 235 - return op-ostart; 236 - } 237 - 238 - 239 - size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, 240 - const void* cSrc, size_t cSrcSize, 241 - const FSE_DTable* dt) 242 - { 243 - const void* ptr = dt; 244 - const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; 245 - const U32 fastMode = DTableH->fastMode; 246 - 247 - /* select fast mode (static) */ 248 - if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); 249 - return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); 250 - } 251 - 252 - 253 - size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) 254 - { 255 - return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0); 290 + assert(op >= ostart); 291 + return (size_t)(op-ostart); 256 292 } 257 293 258 294 typedef struct { 259 295 short ncount[FSE_MAX_SYMBOL_VALUE + 1]; 260 - FSE_DTable dtable[]; /* Dynamically sized */ 261 296 } FSE_DecompressWksp; 262 297 263 298 ··· 252 327 unsigned tableLog; 253 328 unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; 254 329 FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace; 330 + size_t const dtablePos = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable); 331 + FSE_DTable* const dtable = (FSE_DTable*)workSpace + dtablePos; 255 332 256 - DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0); 333 + FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0); 257 334 if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC); 258 335 336 + /* correct offset to dtable depends on this property */ 337 + FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0); 338 + 259 339 /* normal FSE decoding mode */ 260 - { 261 - size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); 340 + { size_t const NCountLength = 341 + FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); 262 342 if (FSE_isError(NCountLength)) return NCountLength; 263 343 if (tableLog > maxLog) return ERROR(tableLog_tooLarge); 264 344 assert(NCountLength <= cSrcSize); ··· 272 342 } 273 343 274 344 if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); 275 - workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog); 345 + assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize); 346 + workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); 276 347 wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); 277 348 278 - CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); 349 + CHECK_F( FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); 279 350 280 351 { 281 - const void* ptr = wksp->dtable; 352 + const void* ptr = dtable; 282 353 const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; 283 354 const U32 fastMode = DTableH->fastMode; 284 355 285 356 /* select fast mode (static) */ 286 - if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1); 287 - return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0); 357 + if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1); 358 + return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0); 288 359 } 289 360 } 290 361 ··· 312 381 (void)bmi2; 313 382 return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); 314 383 } 315 - 316 - 317 - typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; 318 - 319 - 320 384 321 385 #endif /* FSE_COMMONDEFS_ONLY */
+85 -165
lib/zstd/common/huf.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* ****************************************************************** 2 3 * huff0 huffman codec, 3 4 * part of Finite State Entropy library 4 - * Copyright (c) Yann Collet, Facebook, Inc. 5 + * Copyright (c) Meta Platforms, Inc. and affiliates. 5 6 * 6 7 * You can contact the author at : 7 8 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy ··· 13 12 * You may select, at your option, one of the above-listed licenses. 14 13 ****************************************************************** */ 15 14 16 - 17 15 #ifndef HUF_H_298734234 18 16 #define HUF_H_298734234 19 17 20 18 /* *** Dependencies *** */ 21 19 #include "zstd_deps.h" /* size_t */ 22 - 23 - 24 - /* *** library symbols visibility *** */ 25 - /* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, 26 - * HUF symbols remain "private" (internal symbols for library only). 27 - * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ 28 - #if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) 29 - # define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) 30 - #elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ 31 - # define HUF_PUBLIC_API __declspec(dllexport) 32 - #elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) 33 - # define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ 34 - #else 35 - # define HUF_PUBLIC_API 36 - #endif 37 - 38 - 39 - /* ========================== */ 40 - /* *** simple functions *** */ 41 - /* ========================== */ 42 - 43 - /* HUF_compress() : 44 - * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. 45 - * 'dst' buffer must be already allocated. 46 - * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). 47 - * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. 48 - * @return : size of compressed data (<= `dstCapacity`). 49 - * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! 50 - * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) 51 - */ 52 - HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, 53 - const void* src, size_t srcSize); 54 - 55 - /* HUF_decompress() : 56 - * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', 57 - * into already allocated buffer 'dst', of minimum size 'dstSize'. 58 - * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. 59 - * Note : in contrast with FSE, HUF_decompress can regenerate 60 - * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, 61 - * because it knows size to regenerate (originalSize). 62 - * @return : size of regenerated data (== originalSize), 63 - * or an error code, which can be tested using HUF_isError() 64 - */ 65 - HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, 66 - const void* cSrc, size_t cSrcSize); 67 - 68 - 69 - /* *** Tool functions *** */ 70 - #define HUF_BLOCKSIZE_MAX (128 * 1024) /*< maximum input size for a single block compressed with HUF_compress */ 71 - HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /*< maximum compressed size (worst case) */ 72 - 73 - /* Error Management */ 74 - HUF_PUBLIC_API unsigned HUF_isError(size_t code); /*< tells if a return value is an error code */ 75 - HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /*< provides error code string (useful for debugging) */ 76 - 77 - 78 - /* *** Advanced function *** */ 79 - 80 - /* HUF_compress2() : 81 - * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. 82 - * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . 83 - * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ 84 - HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, 85 - const void* src, size_t srcSize, 86 - unsigned maxSymbolValue, unsigned tableLog); 87 - 88 - /* HUF_compress4X_wksp() : 89 - * Same as HUF_compress2(), but uses externally allocated `workSpace`. 90 - * `workspace` must be at least as large as HUF_WORKSPACE_SIZE */ 91 - #define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */) 92 - #define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64)) 93 - HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, 94 - const void* src, size_t srcSize, 95 - unsigned maxSymbolValue, unsigned tableLog, 96 - void* workSpace, size_t wkspSize); 97 - 98 - #endif /* HUF_H_298734234 */ 99 - 100 - /* ****************************************************************** 101 - * WARNING !! 102 - * The following section contains advanced and experimental definitions 103 - * which shall never be used in the context of a dynamic library, 104 - * because they are not guaranteed to remain stable in the future. 105 - * Only consider them in association with static linking. 106 - * *****************************************************************/ 107 - #if !defined(HUF_H_HUF_STATIC_LINKING_ONLY) 108 - #define HUF_H_HUF_STATIC_LINKING_ONLY 109 - 110 - /* *** Dependencies *** */ 111 - #include "mem.h" /* U32 */ 20 + #include "mem.h" /* U32 */ 112 21 #define FSE_STATIC_LINKING_ONLY 113 22 #include "fse.h" 114 23 24 + /* *** Tool functions *** */ 25 + #define HUF_BLOCKSIZE_MAX (128 * 1024) /*< maximum input size for a single block compressed with HUF_compress */ 26 + size_t HUF_compressBound(size_t size); /*< maximum compressed size (worst case) */ 27 + 28 + /* Error Management */ 29 + unsigned HUF_isError(size_t code); /*< tells if a return value is an error code */ 30 + const char* HUF_getErrorName(size_t code); /*< provides error code string (useful for debugging) */ 31 + 32 + 33 + #define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */) 34 + #define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64)) 115 35 116 36 /* *** Constants *** */ 117 37 #define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */ ··· 73 151 /* **************************************** 74 152 * Advanced decompression functions 75 153 ******************************************/ 76 - size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ 77 - #ifndef HUF_FORCE_DECOMPRESS_X1 78 - size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ 79 - #endif 80 154 81 - size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< decodes RLE and uncompressed */ 82 - size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< considers RLE and uncompressed as errors */ 83 - size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< considers RLE and uncompressed as errors */ 84 - size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ 85 - size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder */ 86 - #ifndef HUF_FORCE_DECOMPRESS_X1 87 - size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ 88 - size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder */ 89 - #endif 155 + /* 156 + * Huffman flags bitset. 157 + * For all flags, 0 is the default value. 158 + */ 159 + typedef enum { 160 + /* 161 + * If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime. 162 + * Otherwise: Ignored. 163 + */ 164 + HUF_flags_bmi2 = (1 << 0), 165 + /* 166 + * If set: Test possible table depths to find the one that produces the smallest header + encoded size. 167 + * If unset: Use heuristic to find the table depth. 168 + */ 169 + HUF_flags_optimalDepth = (1 << 1), 170 + /* 171 + * If set: If the previous table can encode the input, always reuse the previous table. 172 + * If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output. 173 + */ 174 + HUF_flags_preferRepeat = (1 << 2), 175 + /* 176 + * If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress. 177 + * If unset: Always histogram the entire input. 178 + */ 179 + HUF_flags_suspectUncompressible = (1 << 3), 180 + /* 181 + * If set: Don't use assembly implementations 182 + * If unset: Allow using assembly implementations 183 + */ 184 + HUF_flags_disableAsm = (1 << 4), 185 + /* 186 + * If set: Don't use the fast decoding loop, always use the fallback decoding loop. 187 + * If unset: Use the fast decoding loop when possible. 188 + */ 189 + HUF_flags_disableFast = (1 << 5) 190 + } HUF_flags_e; 90 191 91 192 92 193 /* **************************************** 93 194 * HUF detailed API 94 195 * ****************************************/ 196 + #define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra 95 197 96 198 /*! HUF_compress() does the following: 97 199 * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") ··· 128 182 * For example, it's possible to compress several blocks using the same 'CTable', 129 183 * or to save and regenerate 'CTable' using external methods. 130 184 */ 131 - unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); 132 - size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ 133 - size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); 185 + unsigned HUF_minTableLog(unsigned symbolCardinality); 186 + unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue); 187 + unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, 188 + size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */ 134 189 size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); 135 - size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); 136 - size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2); 190 + size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags); 137 191 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); 138 192 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); 139 193 ··· 142 196 HUF_repeat_check, /*< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ 143 197 HUF_repeat_valid /*< Can use the previous table and it is assumed to be valid */ 144 198 } HUF_repeat; 199 + 145 200 /* HUF_compress4X_repeat() : 146 201 * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. 147 202 * If it uses hufTable it does not modify hufTable or repeat. ··· 153 206 const void* src, size_t srcSize, 154 207 unsigned maxSymbolValue, unsigned tableLog, 155 208 void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ 156 - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible); 209 + HUF_CElt* hufTable, HUF_repeat* repeat, int flags); 157 210 158 211 /* HUF_buildCTable_wksp() : 159 212 * Same as HUF_buildCTable(), but using externally allocated scratch buffer. 160 213 * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. 161 214 */ 162 - #define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) 215 + #define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192) 163 216 #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) 164 217 size_t HUF_buildCTable_wksp (HUF_CElt* tree, 165 218 const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, ··· 185 238 U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, 186 239 const void* src, size_t srcSize, 187 240 void* workspace, size_t wkspSize, 188 - int bmi2); 241 + int flags); 189 242 190 243 /* HUF_readCTable() : 191 244 * Loading a CTable saved with HUF_writeCTable() */ ··· 193 246 194 247 /* HUF_getNbBitsFromCTable() : 195 248 * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX 196 - * Note 1 : is not inlined, as HUF_CElt definition is private */ 249 + * Note 1 : If symbolValue > HUF_readCTableHeader(symbolTable).maxSymbolValue, returns 0 250 + * Note 2 : is not inlined, as HUF_CElt definition is private 251 + */ 197 252 U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue); 253 + 254 + typedef struct { 255 + BYTE tableLog; 256 + BYTE maxSymbolValue; 257 + BYTE unused[sizeof(size_t) - 2]; 258 + } HUF_CTableHeader; 259 + 260 + /* HUF_readCTableHeader() : 261 + * @returns The header from the CTable specifying the tableLog and the maxSymbolValue. 262 + */ 263 + HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable); 198 264 199 265 /* 200 266 * HUF_decompress() does the following: ··· 236 276 #define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9)) 237 277 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) 238 278 239 - #ifndef HUF_FORCE_DECOMPRESS_X2 240 - size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); 241 - size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); 242 - #endif 243 - #ifndef HUF_FORCE_DECOMPRESS_X1 244 - size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); 245 - size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); 246 - #endif 247 - 248 - size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); 249 - #ifndef HUF_FORCE_DECOMPRESS_X2 250 - size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); 251 - #endif 252 - #ifndef HUF_FORCE_DECOMPRESS_X1 253 - size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); 254 - #endif 255 - 256 279 257 280 /* ====================== */ 258 281 /* single stream variants */ 259 282 /* ====================== */ 260 283 261 - size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); 262 - size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */ 263 - size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); 264 - size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2); 284 + size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags); 265 285 /* HUF_compress1X_repeat() : 266 286 * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. 267 287 * If it uses hufTable it does not modify hufTable or repeat. ··· 252 312 const void* src, size_t srcSize, 253 313 unsigned maxSymbolValue, unsigned tableLog, 254 314 void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ 255 - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible); 315 + HUF_CElt* hufTable, HUF_repeat* repeat, int flags); 256 316 257 - size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ 317 + size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); 258 318 #ifndef HUF_FORCE_DECOMPRESS_X1 259 - size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ 260 - #endif 261 - 262 - size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); 263 - size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); 264 - #ifndef HUF_FORCE_DECOMPRESS_X2 265 - size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ 266 - size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder */ 267 - #endif 268 - #ifndef HUF_FORCE_DECOMPRESS_X1 269 - size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ 270 - size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder */ 271 - #endif 272 - 273 - size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /*< automatic selection of sing or double symbol decoder, based on DTable */ 274 - #ifndef HUF_FORCE_DECOMPRESS_X2 275 - size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); 276 - #endif 277 - #ifndef HUF_FORCE_DECOMPRESS_X1 278 - size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); 319 + size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); /*< double-symbols decoder */ 279 320 #endif 280 321 281 322 /* BMI2 variants. 282 323 * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. 283 324 */ 284 - size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); 325 + size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags); 285 326 #ifndef HUF_FORCE_DECOMPRESS_X2 286 - size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); 327 + size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); 287 328 #endif 288 - size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); 289 - size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); 329 + size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags); 330 + size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); 290 331 #ifndef HUF_FORCE_DECOMPRESS_X2 291 - size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); 332 + size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags); 292 333 #endif 293 334 #ifndef HUF_FORCE_DECOMPRESS_X1 294 - size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); 335 + size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags); 295 336 #endif 296 337 297 - #endif /* HUF_STATIC_LINKING_ONLY */ 298 - 338 + #endif /* HUF_H_298734234 */
+2 -1
lib/zstd/common/mem.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 2 2 /* 3 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 4 4 * All rights reserved. 5 5 * 6 6 * This source code is licensed under both the BSD-style license (found in the ··· 24 24 /*-**************************************** 25 25 * Compiler specifics 26 26 ******************************************/ 27 + #undef MEM_STATIC /* may be already defined from common/compiler.h */ 27 28 #define MEM_STATIC static inline 28 29 29 30 /*-**************************************************************
+35 -10
lib/zstd/common/portability_macros.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 13 12 #define ZSTD_PORTABILITY_MACROS_H 14 13 15 14 /* 16 - * This header file contains macro defintions to support portability. 15 + * This header file contains macro definitions to support portability. 17 16 * This header is shared between C and ASM code, so it MUST only 18 17 * contain macro definitions. It MUST not contain any C code. 19 18 * ··· 46 45 /* Mark the internal assembly functions as hidden */ 47 46 #ifdef __ELF__ 48 47 # define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func 48 + #elif defined(__APPLE__) 49 + # define ZSTD_HIDE_ASM_FUNCTION(func) .private_extern func 49 50 #else 50 51 # define ZSTD_HIDE_ASM_FUNCTION(func) 51 52 #endif 53 + 54 + /* Compile time determination of BMI2 support */ 55 + 52 56 53 57 /* Enable runtime BMI2 dispatch based on the CPU. 54 58 * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. 55 59 */ 56 60 #ifndef DYNAMIC_BMI2 57 - #if ((defined(__clang__) && __has_attribute(__target__)) \ 61 + # if ((defined(__clang__) && __has_attribute(__target__)) \ 58 62 || (defined(__GNUC__) \ 59 63 && (__GNUC__ >= 11))) \ 60 - && (defined(__x86_64__) || defined(_M_X64)) \ 64 + && (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)) \ 61 65 && !defined(__BMI2__) 62 - # define DYNAMIC_BMI2 1 63 - #else 64 - # define DYNAMIC_BMI2 0 65 - #endif 66 + # define DYNAMIC_BMI2 1 67 + # else 68 + # define DYNAMIC_BMI2 0 69 + # endif 66 70 #endif 67 71 68 72 /* 69 - * Only enable assembly for GNUC comptabile compilers, 73 + * Only enable assembly for GNU C compatible compilers, 70 74 * because other platforms may not support GAS assembly syntax. 71 75 * 72 - * Only enable assembly for Linux / MacOS, other platforms may 76 + * Only enable assembly for Linux / MacOS / Win32, other platforms may 73 77 * work, but they haven't been tested. This could likely be 74 78 * extended to BSD systems. 75 79 * ··· 95 89 * - BMI2 is supported at compile time 96 90 */ 97 91 #define ZSTD_ENABLE_ASM_X86_64_BMI2 0 92 + 93 + /* 94 + * For x86 ELF targets, add .note.gnu.property section for Intel CET in 95 + * assembly sources when CET is enabled. 96 + * 97 + * Additionally, any function that may be called indirectly must begin 98 + * with ZSTD_CET_ENDBRANCH. 99 + */ 100 + #if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \ 101 + && defined(__has_include) 102 + # if __has_include(<cet.h>) 103 + # include <cet.h> 104 + # define ZSTD_CET_ENDBRANCH _CET_ENDBR 105 + # endif 106 + #endif 107 + 108 + #ifndef ZSTD_CET_ENDBRANCH 109 + # define ZSTD_CET_ENDBRANCH 110 + #endif 98 111 99 112 #endif /* ZSTD_PORTABILITY_MACROS_H */
+2 -36
lib/zstd/common/zstd_common.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 15 14 * Dependencies 16 15 ***************************************/ 17 16 #define ZSTD_DEPS_NEED_MALLOC 18 - #include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ 19 17 #include "error_private.h" 20 18 #include "zstd_internal.h" 21 19 ··· 47 47 /*! ZSTD_getErrorString() : 48 48 * provides error code string from enum */ 49 49 const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } 50 - 51 - 52 - 53 - /*=************************************************************** 54 - * Custom allocator 55 - ****************************************************************/ 56 - void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) 57 - { 58 - if (customMem.customAlloc) 59 - return customMem.customAlloc(customMem.opaque, size); 60 - return ZSTD_malloc(size); 61 - } 62 - 63 - void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) 64 - { 65 - if (customMem.customAlloc) { 66 - /* calloc implemented as malloc+memset; 67 - * not as efficient as calloc, but next best guess for custom malloc */ 68 - void* const ptr = customMem.customAlloc(customMem.opaque, size); 69 - ZSTD_memset(ptr, 0, size); 70 - return ptr; 71 - } 72 - return ZSTD_calloc(1, size); 73 - } 74 - 75 - void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) 76 - { 77 - if (ptr!=NULL) { 78 - if (customMem.customFree) 79 - customMem.customFree(customMem.opaque, ptr); 80 - else 81 - ZSTD_free(ptr); 82 - } 83 - }
+15 -1
lib/zstd/common/zstd_deps.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 2 2 /* 3 - * Copyright (c) Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 4 4 * All rights reserved. 5 5 * 6 6 * This source code is licensed under both the BSD-style license (found in the ··· 105 105 106 106 #endif /* ZSTD_DEPS_IO */ 107 107 #endif /* ZSTD_DEPS_NEED_IO */ 108 + 109 + /* 110 + * Only requested when MSAN is enabled. 111 + * Need: 112 + * intptr_t 113 + */ 114 + #ifdef ZSTD_DEPS_NEED_STDINT 115 + #ifndef ZSTD_DEPS_STDINT 116 + #define ZSTD_DEPS_STDINT 117 + 118 + /* intptr_t already provided by ZSTD_DEPS_COMMON */ 119 + 120 + #endif /* ZSTD_DEPS_STDINT */ 121 + #endif /* ZSTD_DEPS_NEED_STDINT */
+14 -139
lib/zstd/common/zstd_internal.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 29 28 #include <linux/zstd.h> 30 29 #define FSE_STATIC_LINKING_ONLY 31 30 #include "fse.h" 32 - #define HUF_STATIC_LINKING_ONLY 33 31 #include "huf.h" 34 32 #include <linux/xxhash.h> /* XXH_reset, update, digest */ 35 33 #define ZSTD_TRACE 0 36 - 37 34 38 35 /* ---- static assert (debug) --- */ 39 36 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) ··· 82 83 #define ZSTD_FRAMECHECKSUMSIZE 4 83 84 84 85 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ 85 - #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ 86 + #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */ 87 + #define MIN_LITERALS_FOR_4_STREAMS 6 86 88 87 - #define HufLog 12 88 - typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; 89 + typedef enum { set_basic, set_rle, set_compressed, set_repeat } SymbolEncodingType_e; 89 90 90 91 #define LONGNBSEQ 0x7F00 91 92 92 93 #define MINMATCH 3 93 94 94 95 #define Litbits 8 96 + #define LitHufLog 11 95 97 #define MaxLit ((1<<Litbits) - 1) 96 98 #define MaxML 52 97 99 #define MaxLL 35 ··· 103 103 #define LLFSELog 9 104 104 #define OffFSELog 8 105 105 #define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog) 106 + #define MaxMLBits 16 107 + #define MaxLLBits 16 106 108 107 109 #define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */ 108 110 /* Each table cannot take more than #symbols * FSELog bits */ ··· 168 166 ZSTD_memcpy(dst, src, 8); 169 167 #endif 170 168 } 171 - #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } 169 + #define COPY8(d,s) do { ZSTD_copy8(d,s); d+=8; s+=8; } while (0) 172 170 173 171 /* Need to use memmove here since the literal buffer can now be located within 174 172 the dst buffer. In circumstances where the op "catches up" to where the ··· 188 186 ZSTD_memcpy(dst, copy16_buf, 16); 189 187 #endif 190 188 } 191 - #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } 189 + #define COPY16(d,s) do { ZSTD_copy16(d,s); d+=16; s+=16; } while (0) 192 190 193 191 #define WILDCOPY_OVERLENGTH 32 194 192 #define WILDCOPY_VECLEN 16 ··· 217 215 if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { 218 216 /* Handle short offset copies. */ 219 217 do { 220 - COPY8(op, ip) 218 + COPY8(op, ip); 221 219 } while (op < oend); 222 220 } else { 223 221 assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); ··· 227 225 * one COPY16() in the first call. Then, do two calls per loop since 228 226 * at that point it is more likely to have a high trip count. 229 227 */ 230 - #ifdef __aarch64__ 231 - do { 232 - COPY16(op, ip); 233 - } 234 - while (op < oend); 235 - #else 236 228 ZSTD_copy16(op, ip); 237 229 if (16 >= length) return; 238 230 op += 16; ··· 236 240 COPY16(op, ip); 237 241 } 238 242 while (op < oend); 239 - #endif 240 243 } 241 244 } 242 245 ··· 268 273 /*-******************************************* 269 274 * Private declarations 270 275 *********************************************/ 271 - typedef struct seqDef_s { 272 - U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */ 273 - U16 litLength; 274 - U16 mlBase; /* mlBase == matchLength - MINMATCH */ 275 - } seqDef; 276 - 277 - /* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */ 278 - typedef enum { 279 - ZSTD_llt_none = 0, /* no longLengthType */ 280 - ZSTD_llt_literalLength = 1, /* represents a long literal */ 281 - ZSTD_llt_matchLength = 2 /* represents a long match */ 282 - } ZSTD_longLengthType_e; 283 - 284 - typedef struct { 285 - seqDef* sequencesStart; 286 - seqDef* sequences; /* ptr to end of sequences */ 287 - BYTE* litStart; 288 - BYTE* lit; /* ptr to end of literals */ 289 - BYTE* llCode; 290 - BYTE* mlCode; 291 - BYTE* ofCode; 292 - size_t maxNbSeq; 293 - size_t maxNbLit; 294 - 295 - /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength 296 - * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment 297 - * the existing value of the litLength or matchLength by 0x10000. 298 - */ 299 - ZSTD_longLengthType_e longLengthType; 300 - U32 longLengthPos; /* Index of the sequence to apply long length modification to */ 301 - } seqStore_t; 302 - 303 - typedef struct { 304 - U32 litLength; 305 - U32 matchLength; 306 - } ZSTD_sequenceLength; 307 - 308 - /* 309 - * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences 310 - * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. 311 - */ 312 - MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) 313 - { 314 - ZSTD_sequenceLength seqLen; 315 - seqLen.litLength = seq->litLength; 316 - seqLen.matchLength = seq->mlBase + MINMATCH; 317 - if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { 318 - if (seqStore->longLengthType == ZSTD_llt_literalLength) { 319 - seqLen.litLength += 0xFFFF; 320 - } 321 - if (seqStore->longLengthType == ZSTD_llt_matchLength) { 322 - seqLen.matchLength += 0xFFFF; 323 - } 324 - } 325 - return seqLen; 326 - } 327 276 328 277 /* 329 278 * Contains the compressed frame size and an upper-bound for the decompressed frame size. ··· 276 337 * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` 277 338 */ 278 339 typedef struct { 340 + size_t nbBlocks; 279 341 size_t compressedSize; 280 342 unsigned long long decompressedBound; 281 343 } ZSTD_frameSizeInfo; /* decompress & legacy */ 282 - 283 - const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ 284 - void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ 285 - 286 - /* custom memory allocation functions */ 287 - void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem); 288 - void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem); 289 - void ZSTD_customFree(void* ptr, ZSTD_customMem customMem); 290 - 291 - 292 - MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ 293 - { 294 - assert(val != 0); 295 - { 296 - # if (__GNUC__ >= 3) /* GCC Intrinsic */ 297 - return __builtin_clz (val) ^ 31; 298 - # else /* Software version */ 299 - static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; 300 - U32 v = val; 301 - v |= v >> 1; 302 - v |= v >> 2; 303 - v |= v >> 4; 304 - v |= v >> 8; 305 - v |= v >> 16; 306 - return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; 307 - # endif 308 - } 309 - } 310 - 311 - /* 312 - * Counts the number of trailing zeros of a `size_t`. 313 - * Most compilers should support CTZ as a builtin. A backup 314 - * implementation is provided if the builtin isn't supported, but 315 - * it may not be terribly efficient. 316 - */ 317 - MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val) 318 - { 319 - if (MEM_64bits()) { 320 - # if (__GNUC__ >= 4) 321 - return __builtin_ctzll((U64)val); 322 - # else 323 - static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19, 324 - 4, 25, 14, 28, 9, 34, 20, 56, 325 - 5, 17, 26, 54, 15, 41, 29, 43, 326 - 10, 31, 38, 35, 21, 45, 49, 57, 327 - 63, 6, 12, 18, 24, 27, 33, 55, 328 - 16, 53, 40, 42, 30, 37, 44, 48, 329 - 62, 11, 23, 32, 52, 39, 36, 47, 330 - 61, 22, 51, 46, 60, 50, 59, 58 }; 331 - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; 332 - # endif 333 - } else { /* 32 bits */ 334 - # if (__GNUC__ >= 3) 335 - return __builtin_ctz((U32)val); 336 - # else 337 - static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3, 338 - 30, 22, 20, 15, 25, 17, 4, 8, 339 - 31, 27, 13, 23, 21, 19, 16, 7, 340 - 26, 12, 18, 6, 11, 5, 10, 9 }; 341 - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 342 - # endif 343 - } 344 - } 345 - 346 344 347 345 /* ZSTD_invalidateRepCodes() : 348 346 * ensures next compression will not use repcodes from previous block. ··· 296 420 297 421 /*! ZSTD_getcBlockSize() : 298 422 * Provides the size of compressed block from block header `src` */ 299 - /* Used by: decompress, fullbench (does not get its definition from here) */ 423 + /* Used by: decompress, fullbench */ 300 424 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, 301 425 blockProperties_t* bpPtr); 302 426 303 427 /*! ZSTD_decodeSeqHeaders() : 304 428 * decode sequence header from src */ 305 - /* Used by: decompress, fullbench (does not get its definition from here) */ 429 + /* Used by: zstd_decompress_block, fullbench */ 306 430 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, 307 431 const void* src, size_t srcSize); 308 432 ··· 314 438 ZSTD_cpuid_t cpuid = ZSTD_cpuid(); 315 439 return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid); 316 440 } 317 - 318 441 319 442 #endif /* ZSTD_CCOMMON_H_MODULE */
+2 -1
lib/zstd/compress/clevels.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the
+16 -58
lib/zstd/compress/fse_compress.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* ****************************************************************** 2 3 * FSE : Finite State Entropy encoder 3 - * Copyright (c) Yann Collet, Facebook, Inc. 4 + * Copyright (c) Meta Platforms, Inc. and affiliates. 4 5 * 5 6 * You can contact the author at : 6 7 * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy ··· 26 25 #include "../common/error_private.h" 27 26 #define ZSTD_DEPS_NEED_MALLOC 28 27 #define ZSTD_DEPS_NEED_MATH64 29 - #include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */ 28 + #include "../common/zstd_deps.h" /* ZSTD_memset */ 29 + #include "../common/bits.h" /* ZSTD_highbit32 */ 30 30 31 31 32 32 /* ************************************************************** ··· 92 90 assert(tableLog < 16); /* required for threshold strategy to work */ 93 91 94 92 /* For explanations on how to distribute symbol values over the table : 95 - * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ 93 + * https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ 96 94 97 95 #ifdef __clang_analyzer__ 98 96 ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ ··· 193 191 break; 194 192 default : 195 193 assert(normalizedCounter[s] > 1); 196 - { U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1); 194 + { U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1); 197 195 U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut; 198 196 symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; 199 197 symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]); ··· 226 224 size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog 227 225 + 4 /* bitCount initialized at 4 */ 228 226 + 2 /* first two symbols may use one additional bit each */) / 8) 229 - + 1 /* round up to whole nb bytes */ 230 - + 2 /* additional two bytes for bitstream flush */; 227 + + 1 /* round up to whole nb bytes */ 228 + + 2 /* additional two bytes for bitstream flush */; 231 229 return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ 232 230 } 233 231 ··· 256 254 /* Init */ 257 255 remaining = tableSize+1; /* +1 for extra accuracy */ 258 256 threshold = tableSize; 259 - nbBits = tableLog+1; 257 + nbBits = (int)tableLog+1; 260 258 261 259 while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ 262 260 if (previousIs0) { ··· 275 273 } 276 274 while (symbol >= start+3) { 277 275 start+=3; 278 - bitStream += 3 << bitCount; 276 + bitStream += 3U << bitCount; 279 277 bitCount += 2; 280 278 } 281 279 bitStream += (symbol-start) << bitCount; ··· 295 293 count++; /* +1 for extra accuracy */ 296 294 if (count>=threshold) 297 295 count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ 298 - bitStream += count << bitCount; 296 + bitStream += (U32)count << bitCount; 299 297 bitCount += nbBits; 300 298 bitCount -= (count<max); 301 299 previousIs0 = (count==1); ··· 323 321 out[1] = (BYTE)(bitStream>>8); 324 322 out+= (bitCount+7) /8; 325 323 326 - return (out-ostart); 324 + assert(out >= ostart); 325 + return (size_t)(out-ostart); 327 326 } 328 327 329 328 ··· 345 342 * FSE Compression Code 346 343 ****************************************************************/ 347 344 348 - FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) 349 - { 350 - size_t size; 351 - if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; 352 - size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); 353 - return (FSE_CTable*)ZSTD_malloc(size); 354 - } 355 - 356 - void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); } 357 - 358 345 /* provides the minimum logSize to safely represent a distribution */ 359 346 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) 360 347 { 361 - U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; 362 - U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; 348 + U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1; 349 + U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2; 363 350 U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; 364 351 assert(srcSize > 1); /* Not supported, RLE should be used instead */ 365 352 return minBits; ··· 357 364 358 365 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) 359 366 { 360 - U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; 367 + U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus; 361 368 U32 tableLog = maxTableLog; 362 369 U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); 363 370 assert(srcSize > 1); /* Not supported, RLE should be used instead */ ··· 525 532 return tableLog; 526 533 } 527 534 528 - 529 - /* fake FSE_CTable, for raw (uncompressed) input */ 530 - size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) 531 - { 532 - const unsigned tableSize = 1 << nbBits; 533 - const unsigned tableMask = tableSize - 1; 534 - const unsigned maxSymbolValue = tableMask; 535 - void* const ptr = ct; 536 - U16* const tableU16 = ( (U16*) ptr) + 2; 537 - void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */ 538 - FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); 539 - unsigned s; 540 - 541 - /* Sanity checks */ 542 - if (nbBits < 1) return ERROR(GENERIC); /* min size */ 543 - 544 - /* header */ 545 - tableU16[-2] = (U16) nbBits; 546 - tableU16[-1] = (U16) maxSymbolValue; 547 - 548 - /* Build table */ 549 - for (s=0; s<tableSize; s++) 550 - tableU16[s] = (U16)(tableSize + s); 551 - 552 - /* Build Symbol Transformation Table */ 553 - { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); 554 - for (s=0; s<=maxSymbolValue; s++) { 555 - symbolTT[s].deltaNbBits = deltaNbBits; 556 - symbolTT[s].deltaFindState = s-1; 557 - } } 558 - 559 - return 0; 560 - } 561 - 562 535 /* fake FSE_CTable, for rle input (always same symbol) */ 563 536 size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) 564 537 { ··· 622 663 623 664 624 665 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } 625 - 626 666 627 667 #endif /* FSE_COMMONDEFS_ONLY */
+12 -1
lib/zstd/compress/hist.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* ****************************************************************** 2 3 * hist : Histogram functions 3 4 * part of Finite State Entropy project 4 - * Copyright (c) Yann Collet, Facebook, Inc. 5 + * Copyright (c) Meta Platforms, Inc. and affiliates. 5 6 * 6 7 * You can contact the author at : 7 8 * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy ··· 27 26 /*-************************************************************** 28 27 * Histogram functions 29 28 ****************************************************************/ 29 + void HIST_add(unsigned* count, const void* src, size_t srcSize) 30 + { 31 + const BYTE* ip = (const BYTE*)src; 32 + const BYTE* const end = ip + srcSize; 33 + 34 + while (ip<end) { 35 + count[*ip++]++; 36 + } 37 + } 38 + 30 39 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, 31 40 const void* src, size_t srcSize) 32 41 {
+9 -1
lib/zstd/compress/hist.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* ****************************************************************** 2 3 * hist : Histogram functions 3 4 * part of Finite State Entropy project 4 - * Copyright (c) Yann Collet, Facebook, Inc. 5 + * Copyright (c) Meta Platforms, Inc. and affiliates. 5 6 * 6 7 * You can contact the author at : 7 8 * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy ··· 74 73 */ 75 74 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, 76 75 const void* src, size_t srcSize); 76 + 77 + /*! HIST_add() : 78 + * Lowest level: just add nb of occurrences of characters from @src into @count. 79 + * @count is not reset. @count array is presumed large enough (i.e. 1 KB). 80 + @ This function does not need any additional stack memory. 81 + */ 82 + void HIST_add(unsigned* count, const void* src, size_t srcSize);
+284 -157
lib/zstd/compress/huf_compress.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* ****************************************************************** 2 3 * Huffman encoder, part of New Generation Entropy library 3 - * Copyright (c) Yann Collet, Facebook, Inc. 4 + * Copyright (c) Meta Platforms, Inc. and affiliates. 4 5 * 5 6 * You can contact the author at : 6 7 * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy ··· 27 26 #include "hist.h" 28 27 #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ 29 28 #include "../common/fse.h" /* header compression */ 30 - #define HUF_STATIC_LINKING_ONLY 31 29 #include "../common/huf.h" 32 30 #include "../common/error_private.h" 31 + #include "../common/bits.h" /* ZSTD_highbit32 */ 33 32 34 33 35 34 /* ************************************************************** ··· 40 39 41 40 42 41 /* ************************************************************** 43 - * Utils 42 + * Required declarations 44 43 ****************************************************************/ 45 - unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) 44 + typedef struct nodeElt_s { 45 + U32 count; 46 + U16 parent; 47 + BYTE byte; 48 + BYTE nbBits; 49 + } nodeElt; 50 + 51 + 52 + /* ************************************************************** 53 + * Debug Traces 54 + ****************************************************************/ 55 + 56 + #if DEBUGLEVEL >= 2 57 + 58 + static size_t showU32(const U32* arr, size_t size) 46 59 { 47 - return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); 60 + size_t u; 61 + for (u=0; u<size; u++) { 62 + RAWLOG(6, " %u", arr[u]); (void)arr; 63 + } 64 + RAWLOG(6, " \n"); 65 + return size; 48 66 } 67 + 68 + static size_t HUF_getNbBits(HUF_CElt elt); 69 + 70 + static size_t showCTableBits(const HUF_CElt* ctable, size_t size) 71 + { 72 + size_t u; 73 + for (u=0; u<size; u++) { 74 + RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable; 75 + } 76 + RAWLOG(6, " \n"); 77 + return size; 78 + 79 + } 80 + 81 + static size_t showHNodeSymbols(const nodeElt* hnode, size_t size) 82 + { 83 + size_t u; 84 + for (u=0; u<size; u++) { 85 + RAWLOG(6, " %u", hnode[u].byte); (void)hnode; 86 + } 87 + RAWLOG(6, " \n"); 88 + return size; 89 + } 90 + 91 + static size_t showHNodeBits(const nodeElt* hnode, size_t size) 92 + { 93 + size_t u; 94 + for (u=0; u<size; u++) { 95 + RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode; 96 + } 97 + RAWLOG(6, " \n"); 98 + return size; 99 + } 100 + 101 + #endif 49 102 50 103 51 104 /* ******************************************************* ··· 141 86 S16 norm[HUF_TABLELOG_MAX+1]; 142 87 } HUF_CompressWeightsWksp; 143 88 144 - static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize) 89 + static size_t 90 + HUF_compressWeights(void* dst, size_t dstSize, 91 + const void* weightTable, size_t wtSize, 92 + void* workspace, size_t workspaceSize) 145 93 { 146 94 BYTE* const ostart = (BYTE*) dst; 147 95 BYTE* op = ostart; ··· 195 137 196 138 static size_t HUF_getValue(HUF_CElt elt) 197 139 { 198 - return elt & ~0xFF; 140 + return elt & ~(size_t)0xFF; 199 141 } 200 142 201 143 static size_t HUF_getValueFast(HUF_CElt elt) ··· 218 160 } 219 161 } 220 162 163 + HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable) 164 + { 165 + HUF_CTableHeader header; 166 + ZSTD_memcpy(&header, ctable, sizeof(header)); 167 + return header; 168 + } 169 + 170 + static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue) 171 + { 172 + HUF_CTableHeader header; 173 + HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header)); 174 + ZSTD_memset(&header, 0, sizeof(header)); 175 + assert(tableLog < 256); 176 + header.tableLog = (BYTE)tableLog; 177 + assert(maxSymbolValue < 256); 178 + header.maxSymbolValue = (BYTE)maxSymbolValue; 179 + ZSTD_memcpy(ctable, &header, sizeof(header)); 180 + } 181 + 221 182 typedef struct { 222 183 HUF_CompressWeightsWksp wksp; 223 184 BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ ··· 251 174 BYTE* op = (BYTE*)dst; 252 175 U32 n; 253 176 HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32)); 177 + 178 + HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp)); 179 + 180 + assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue); 181 + assert(HUF_readCTableHeader(CTable).tableLog == huffLog); 254 182 255 183 /* check conditions */ 256 184 if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); ··· 286 204 return ((maxSymbolValue+1)/2) + 1; 287 205 } 288 206 289 - /*! HUF_writeCTable() : 290 - `CTable` : Huffman tree to save, using huf representation. 291 - @return : size of saved CTable */ 292 - size_t HUF_writeCTable (void* dst, size_t maxDstSize, 293 - const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) 294 - { 295 - HUF_WriteCTableWksp wksp; 296 - return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp)); 297 - } 298 - 299 207 300 208 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) 301 209 { ··· 303 231 if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 304 232 if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); 305 233 306 - CTable[0] = tableLog; 234 + *maxSymbolValuePtr = nbSymbols - 1; 235 + 236 + HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr); 307 237 308 238 /* Prepare base value per rank */ 309 239 { U32 n, nextRankStart = 0; ··· 337 263 { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); } 338 264 } 339 265 340 - *maxSymbolValuePtr = nbSymbols - 1; 341 266 return readSize; 342 267 } 343 268 344 269 U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue) 345 270 { 346 - const HUF_CElt* ct = CTable + 1; 271 + const HUF_CElt* const ct = CTable + 1; 347 272 assert(symbolValue <= HUF_SYMBOLVALUE_MAX); 273 + if (symbolValue > HUF_readCTableHeader(CTable).maxSymbolValue) 274 + return 0; 348 275 return (U32)HUF_getNbBits(ct[symbolValue]); 349 276 } 350 277 351 278 352 - typedef struct nodeElt_s { 353 - U32 count; 354 - U16 parent; 355 - BYTE byte; 356 - BYTE nbBits; 357 - } nodeElt; 358 - 359 279 /* 360 280 * HUF_setMaxHeight(): 361 - * Enforces maxNbBits on the Huffman tree described in huffNode. 281 + * Try to enforce @targetNbBits on the Huffman tree described in @huffNode. 362 282 * 363 - * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts 364 - * the tree to so that it is a valid canonical Huffman tree. 283 + * It attempts to convert all nodes with nbBits > @targetNbBits 284 + * to employ @targetNbBits instead. Then it adjusts the tree 285 + * so that it remains a valid canonical Huffman tree. 365 286 * 366 287 * @pre The sum of the ranks of each symbol == 2^largestBits, 367 288 * where largestBits == huffNode[lastNonNull].nbBits. 368 289 * @post The sum of the ranks of each symbol == 2^largestBits, 369 - * where largestBits is the return value <= maxNbBits. 290 + * where largestBits is the return value (expected <= targetNbBits). 370 291 * 371 - * @param huffNode The Huffman tree modified in place to enforce maxNbBits. 292 + * @param huffNode The Huffman tree modified in place to enforce targetNbBits. 293 + * It's presumed sorted, from most frequent to rarest symbol. 372 294 * @param lastNonNull The symbol with the lowest count in the Huffman tree. 373 - * @param maxNbBits The maximum allowed number of bits, which the Huffman tree 295 + * @param targetNbBits The allowed number of bits, which the Huffman tree 374 296 * may not respect. After this function the Huffman tree will 375 - * respect maxNbBits. 376 - * @return The maximum number of bits of the Huffman tree after adjustment, 377 - * necessarily no more than maxNbBits. 297 + * respect targetNbBits. 298 + * @return The maximum number of bits of the Huffman tree after adjustment. 378 299 */ 379 - static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) 300 + static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits) 380 301 { 381 302 const U32 largestBits = huffNode[lastNonNull].nbBits; 382 - /* early exit : no elt > maxNbBits, so the tree is already valid. */ 383 - if (largestBits <= maxNbBits) return largestBits; 303 + /* early exit : no elt > targetNbBits, so the tree is already valid. */ 304 + if (largestBits <= targetNbBits) return largestBits; 305 + 306 + DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits); 384 307 385 308 /* there are several too large elements (at least >= 2) */ 386 309 { int totalCost = 0; 387 - const U32 baseCost = 1 << (largestBits - maxNbBits); 310 + const U32 baseCost = 1 << (largestBits - targetNbBits); 388 311 int n = (int)lastNonNull; 389 312 390 - /* Adjust any ranks > maxNbBits to maxNbBits. 313 + /* Adjust any ranks > targetNbBits to targetNbBits. 391 314 * Compute totalCost, which is how far the sum of the ranks is 392 315 * we are over 2^largestBits after adjust the offending ranks. 393 316 */ 394 - while (huffNode[n].nbBits > maxNbBits) { 317 + while (huffNode[n].nbBits > targetNbBits) { 395 318 totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); 396 - huffNode[n].nbBits = (BYTE)maxNbBits; 319 + huffNode[n].nbBits = (BYTE)targetNbBits; 397 320 n--; 398 321 } 399 - /* n stops at huffNode[n].nbBits <= maxNbBits */ 400 - assert(huffNode[n].nbBits <= maxNbBits); 401 - /* n end at index of smallest symbol using < maxNbBits */ 402 - while (huffNode[n].nbBits == maxNbBits) --n; 322 + /* n stops at huffNode[n].nbBits <= targetNbBits */ 323 + assert(huffNode[n].nbBits <= targetNbBits); 324 + /* n end at index of smallest symbol using < targetNbBits */ 325 + while (huffNode[n].nbBits == targetNbBits) --n; 403 326 404 - /* renorm totalCost from 2^largestBits to 2^maxNbBits 327 + /* renorm totalCost from 2^largestBits to 2^targetNbBits 405 328 * note : totalCost is necessarily a multiple of baseCost */ 406 - assert((totalCost & (baseCost - 1)) == 0); 407 - totalCost >>= (largestBits - maxNbBits); 329 + assert(((U32)totalCost & (baseCost - 1)) == 0); 330 + totalCost >>= (largestBits - targetNbBits); 408 331 assert(totalCost > 0); 409 332 410 333 /* repay normalized cost */ ··· 410 339 411 340 /* Get pos of last (smallest = lowest cum. count) symbol per rank */ 412 341 ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); 413 - { U32 currentNbBits = maxNbBits; 342 + { U32 currentNbBits = targetNbBits; 414 343 int pos; 415 344 for (pos=n ; pos >= 0; pos--) { 416 345 if (huffNode[pos].nbBits >= currentNbBits) continue; 417 - currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ 418 - rankLast[maxNbBits-currentNbBits] = (U32)pos; 346 + currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */ 347 + rankLast[targetNbBits-currentNbBits] = (U32)pos; 419 348 } } 420 349 421 350 while (totalCost > 0) { 422 351 /* Try to reduce the next power of 2 above totalCost because we 423 352 * gain back half the rank. 424 353 */ 425 - U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; 354 + U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1; 426 355 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { 427 356 U32 const highPos = rankLast[nBitsToDecrease]; 428 357 U32 const lowPos = rankLast[nBitsToDecrease-1]; ··· 462 391 rankLast[nBitsToDecrease] = noSymbol; 463 392 else { 464 393 rankLast[nBitsToDecrease]--; 465 - if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) 394 + if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease) 466 395 rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ 467 396 } 468 397 } /* while (totalCost > 0) */ ··· 474 403 * TODO. 475 404 */ 476 405 while (totalCost < 0) { /* Sometimes, cost correction overshoot */ 477 - /* special case : no rank 1 symbol (using maxNbBits-1); 478 - * let's create one from largest rank 0 (using maxNbBits). 406 + /* special case : no rank 1 symbol (using targetNbBits-1); 407 + * let's create one from largest rank 0 (using targetNbBits). 479 408 */ 480 409 if (rankLast[1] == noSymbol) { 481 - while (huffNode[n].nbBits == maxNbBits) n--; 410 + while (huffNode[n].nbBits == targetNbBits) n--; 482 411 huffNode[n+1].nbBits--; 483 412 assert(n >= 0); 484 413 rankLast[1] = (U32)(n+1); ··· 492 421 } /* repay normalized cost */ 493 422 } /* there are several too large elements (at least >= 2) */ 494 423 495 - return maxNbBits; 424 + return targetNbBits; 496 425 } 497 426 498 427 typedef struct { ··· 500 429 U16 curr; 501 430 } rankPos; 502 431 503 - typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; 432 + typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)]; 504 433 505 434 /* Number of buckets available for HUF_sort() */ 506 435 #define RANK_POSITION_TABLE_SIZE 192 ··· 519 448 * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing. 520 449 */ 521 450 #define RANK_POSITION_MAX_COUNT_LOG 32 522 - #define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */ 523 - #define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */ 451 + #define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */) 452 + #define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */) 524 453 525 454 /* Return the appropriate bucket index for a given count. See definition of 526 455 * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy. ··· 528 457 static U32 HUF_getIndex(U32 const count) { 529 458 return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF) 530 459 ? count 531 - : BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN; 460 + : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN; 532 461 } 533 462 534 463 /* Helper swap function for HUF_quickSortPartition() */ ··· 651 580 652 581 /* Sort each bucket. */ 653 582 for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) { 654 - U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base; 583 + int const bucketSize = rankPosition[n].curr - rankPosition[n].base; 655 584 U32 const bucketStartIdx = rankPosition[n].base; 656 585 if (bucketSize > 1) { 657 586 assert(bucketStartIdx < maxSymbolValue1); ··· 661 590 662 591 assert(HUF_isSorted(huffNode, maxSymbolValue1)); 663 592 } 593 + 664 594 665 595 /* HUF_buildCTable_wksp() : 666 596 * Same as HUF_buildCTable(), but using externally allocated scratch buffer. ··· 683 611 int lowS, lowN; 684 612 int nodeNb = STARTNODE; 685 613 int n, nodeRoot; 614 + DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1); 686 615 /* init for parents */ 687 616 nonNullRank = (int)maxSymbolValue; 688 617 while(huffNode[nonNullRank].count == 0) nonNullRank--; ··· 709 636 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 710 637 for (n=0; n<=nonNullRank; n++) 711 638 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 639 + 640 + DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1)); 712 641 713 642 return nonNullRank; 714 643 } ··· 746 671 HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */ 747 672 for (n=0; n<alphabetSize; n++) 748 673 HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */ 749 - CTable[0] = maxNbBits; 674 + 675 + HUF_writeCTableHeader(CTable, maxNbBits, maxSymbolValue); 750 676 } 751 677 752 - size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) 678 + size_t 679 + HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, 680 + void* workSpace, size_t wkspSize) 753 681 { 754 - HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32)); 682 + HUF_buildCTable_wksp_tables* const wksp_tables = 683 + (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32)); 755 684 nodeElt* const huffNode0 = wksp_tables->huffNodeTbl; 756 685 nodeElt* const huffNode = huffNode0+1; 757 686 int nonNullRank; 758 687 688 + HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables)); 689 + 690 + DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1); 691 + 759 692 /* safety checks */ 760 693 if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) 761 - return ERROR(workSpace_tooSmall); 694 + return ERROR(workSpace_tooSmall); 762 695 if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; 763 696 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) 764 - return ERROR(maxSymbolValue_tooLarge); 697 + return ERROR(maxSymbolValue_tooLarge); 765 698 ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); 766 699 767 700 /* sort, decreasing order */ 768 701 HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); 702 + DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1)); 769 703 770 704 /* build tree */ 771 705 nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); 772 706 773 - /* enforce maxTableLog */ 707 + /* determine and enforce maxTableLog */ 774 708 maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); 775 709 if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ 776 710 ··· 800 716 } 801 717 802 718 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { 803 - HUF_CElt const* ct = CTable + 1; 804 - int bad = 0; 805 - int s; 806 - for (s = 0; s <= (int)maxSymbolValue; ++s) { 807 - bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0); 808 - } 809 - return !bad; 719 + HUF_CTableHeader header = HUF_readCTableHeader(CTable); 720 + HUF_CElt const* ct = CTable + 1; 721 + int bad = 0; 722 + int s; 723 + 724 + assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX); 725 + 726 + if (header.maxSymbolValue < maxSymbolValue) 727 + return 0; 728 + 729 + for (s = 0; s <= (int)maxSymbolValue; ++s) { 730 + bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0); 731 + } 732 + return !bad; 810 733 } 811 734 812 735 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } ··· 895 804 #if DEBUGLEVEL >= 1 896 805 { 897 806 size_t const nbBits = HUF_getNbBits(elt); 898 - size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1; 807 + size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1; 899 808 (void)dirtyBits; 900 809 /* Middle bits are 0. */ 901 810 assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0); ··· 975 884 { 976 885 size_t const nbBits = bitC->bitPos[0] & 0xFF; 977 886 if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ 978 - return (bitC->ptr - bitC->startPtr) + (nbBits > 0); 887 + return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0); 979 888 } 980 889 } 981 890 ··· 1055 964 const void* src, size_t srcSize, 1056 965 const HUF_CElt* CTable) 1057 966 { 1058 - U32 const tableLog = (U32)CTable[0]; 967 + U32 const tableLog = HUF_readCTableHeader(CTable).tableLog; 1059 968 HUF_CElt const* ct = CTable + 1; 1060 969 const BYTE* ip = (const BYTE*) src; 1061 970 BYTE* const ostart = (BYTE*)dst; 1062 971 BYTE* const oend = ostart + dstSize; 1063 - BYTE* op = ostart; 1064 972 HUF_CStream_t bitC; 1065 973 1066 974 /* init */ 1067 975 if (dstSize < 8) return 0; /* not enough space to compress */ 1068 - { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op)); 976 + { BYTE* op = ostart; 977 + size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op)); 1069 978 if (HUF_isError(initErr)) return 0; } 1070 979 1071 980 if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11) ··· 1136 1045 static size_t 1137 1046 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 1138 1047 const void* src, size_t srcSize, 1139 - const HUF_CElt* CTable, const int bmi2) 1048 + const HUF_CElt* CTable, const int flags) 1140 1049 { 1141 - if (bmi2) { 1050 + if (flags & HUF_flags_bmi2) { 1142 1051 return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); 1143 1052 } 1144 1053 return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); ··· 1149 1058 static size_t 1150 1059 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 1151 1060 const void* src, size_t srcSize, 1152 - const HUF_CElt* CTable, const int bmi2) 1061 + const HUF_CElt* CTable, const int flags) 1153 1062 { 1154 - (void)bmi2; 1063 + (void)flags; 1155 1064 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 1156 1065 } 1157 1066 1158 1067 #endif 1159 1068 1160 - size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) 1069 + size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) 1161 1070 { 1162 - return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); 1163 - } 1164 - 1165 - size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) 1166 - { 1167 - return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); 1071 + return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags); 1168 1072 } 1169 1073 1170 1074 static size_t 1171 1075 HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, 1172 1076 const void* src, size_t srcSize, 1173 - const HUF_CElt* CTable, int bmi2) 1077 + const HUF_CElt* CTable, int flags) 1174 1078 { 1175 1079 size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ 1176 1080 const BYTE* ip = (const BYTE*) src; ··· 1179 1093 op += 6; /* jumpTable */ 1180 1094 1181 1095 assert(op <= oend); 1182 - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); 1096 + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); 1183 1097 if (cSize == 0 || cSize > 65535) return 0; 1184 1098 MEM_writeLE16(ostart, (U16)cSize); 1185 1099 op += cSize; ··· 1187 1101 1188 1102 ip += segmentSize; 1189 1103 assert(op <= oend); 1190 - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); 1104 + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); 1191 1105 if (cSize == 0 || cSize > 65535) return 0; 1192 1106 MEM_writeLE16(ostart+2, (U16)cSize); 1193 1107 op += cSize; ··· 1195 1109 1196 1110 ip += segmentSize; 1197 1111 assert(op <= oend); 1198 - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); 1112 + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); 1199 1113 if (cSize == 0 || cSize > 65535) return 0; 1200 1114 MEM_writeLE16(ostart+4, (U16)cSize); 1201 1115 op += cSize; ··· 1204 1118 ip += segmentSize; 1205 1119 assert(op <= oend); 1206 1120 assert(ip <= iend); 1207 - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); 1121 + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) ); 1208 1122 if (cSize == 0 || cSize > 65535) return 0; 1209 1123 op += cSize; 1210 1124 } ··· 1212 1126 return (size_t)(op-ostart); 1213 1127 } 1214 1128 1215 - size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) 1129 + size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) 1216 1130 { 1217 - return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); 1218 - } 1219 - 1220 - size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2) 1221 - { 1222 - return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2); 1131 + return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags); 1223 1132 } 1224 1133 1225 1134 typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; ··· 1222 1141 static size_t HUF_compressCTable_internal( 1223 1142 BYTE* const ostart, BYTE* op, BYTE* const oend, 1224 1143 const void* src, size_t srcSize, 1225 - HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) 1144 + HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags) 1226 1145 { 1227 1146 size_t const cSize = (nbStreams==HUF_singleStream) ? 1228 - HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : 1229 - HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); 1147 + HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) : 1148 + HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags); 1230 1149 if (HUF_isError(cSize)) { return cSize; } 1231 1150 if (cSize==0) { return 0; } /* uncompressible */ 1232 1151 op += cSize; ··· 1249 1168 #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096 1250 1169 #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */ 1251 1170 1171 + unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue) 1172 + { 1173 + unsigned cardinality = 0; 1174 + unsigned i; 1175 + 1176 + for (i = 0; i < maxSymbolValue + 1; i++) { 1177 + if (count[i] != 0) cardinality += 1; 1178 + } 1179 + 1180 + return cardinality; 1181 + } 1182 + 1183 + unsigned HUF_minTableLog(unsigned symbolCardinality) 1184 + { 1185 + U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1; 1186 + return minBitsSymbols; 1187 + } 1188 + 1189 + unsigned HUF_optimalTableLog( 1190 + unsigned maxTableLog, 1191 + size_t srcSize, 1192 + unsigned maxSymbolValue, 1193 + void* workSpace, size_t wkspSize, 1194 + HUF_CElt* table, 1195 + const unsigned* count, 1196 + int flags) 1197 + { 1198 + assert(srcSize > 1); /* Not supported, RLE should be used instead */ 1199 + assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables)); 1200 + 1201 + if (!(flags & HUF_flags_optimalDepth)) { 1202 + /* cheap evaluation, based on FSE */ 1203 + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); 1204 + } 1205 + 1206 + { BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp); 1207 + size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp); 1208 + size_t hSize, newSize; 1209 + const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue); 1210 + const unsigned minTableLog = HUF_minTableLog(symbolCardinality); 1211 + size_t optSize = ((size_t) ~0) - 1; 1212 + unsigned optLog = maxTableLog, optLogGuess; 1213 + 1214 + DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize); 1215 + 1216 + /* Search until size increases */ 1217 + for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) { 1218 + DEBUGLOG(7, "checking for huffLog=%u", optLogGuess); 1219 + 1220 + { size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize); 1221 + if (ERR_isError(maxBits)) continue; 1222 + 1223 + if (maxBits < optLogGuess && optLogGuess > minTableLog) break; 1224 + 1225 + hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize); 1226 + } 1227 + 1228 + if (ERR_isError(hSize)) continue; 1229 + 1230 + newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize; 1231 + 1232 + if (newSize > optSize + 1) { 1233 + break; 1234 + } 1235 + 1236 + if (newSize < optSize) { 1237 + optSize = newSize; 1238 + optLog = optLogGuess; 1239 + } 1240 + } 1241 + assert(optLog <= HUF_TABLELOG_MAX); 1242 + return optLog; 1243 + } 1244 + } 1245 + 1252 1246 /* HUF_compress_internal() : 1253 1247 * `workSpace_align4` must be aligned on 4-bytes boundaries, 1254 1248 * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */ ··· 1333 1177 unsigned maxSymbolValue, unsigned huffLog, 1334 1178 HUF_nbStreams_e nbStreams, 1335 1179 void* workSpace, size_t wkspSize, 1336 - HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, 1337 - const int bmi2, unsigned suspectUncompressible) 1180 + HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags) 1338 1181 { 1339 1182 HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t)); 1340 1183 BYTE* const ostart = (BYTE*)dst; 1341 1184 BYTE* const oend = ostart + dstSize; 1342 1185 BYTE* op = ostart; 1343 1186 1187 + DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize); 1344 1188 HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE); 1345 1189 1346 1190 /* checks & inits */ ··· 1354 1198 if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; 1355 1199 1356 1200 /* Heuristic : If old table is valid, use it for small inputs */ 1357 - if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { 1201 + if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) { 1358 1202 return HUF_compressCTable_internal(ostart, op, oend, 1359 1203 src, srcSize, 1360 - nbStreams, oldHufTable, bmi2); 1204 + nbStreams, oldHufTable, flags); 1361 1205 } 1362 1206 1363 1207 /* If uncompressible data is suspected, do a smaller sampling first */ 1364 1208 DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2); 1365 - if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { 1209 + if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { 1366 1210 size_t largestTotal = 0; 1211 + DEBUGLOG(5, "input suspected incompressible : sampling to check"); 1367 1212 { unsigned maxSymbolValueBegin = maxSymbolValue; 1368 1213 CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); 1369 1214 largestTotal += largestBegin; ··· 1381 1224 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ 1382 1225 if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ 1383 1226 } 1227 + DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1)); 1384 1228 1385 1229 /* Check validity of previous table */ 1386 1230 if ( repeat ··· 1390 1232 *repeat = HUF_repeat_none; 1391 1233 } 1392 1234 /* Heuristic : use existing table for small inputs */ 1393 - if (preferRepeat && repeat && *repeat != HUF_repeat_none) { 1235 + if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) { 1394 1236 return HUF_compressCTable_internal(ostart, op, oend, 1395 1237 src, srcSize, 1396 - nbStreams, oldHufTable, bmi2); 1238 + nbStreams, oldHufTable, flags); 1397 1239 } 1398 1240 1399 1241 /* Build Huffman Tree */ 1400 - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); 1242 + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags); 1401 1243 { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, 1402 1244 maxSymbolValue, huffLog, 1403 1245 &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); 1404 1246 CHECK_F(maxBits); 1405 1247 huffLog = (U32)maxBits; 1406 - } 1407 - /* Zero unused symbols in CTable, so we can check it for validity */ 1408 - { 1409 - size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue); 1410 - size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt); 1411 - ZSTD_memset(table->CTable + ctableSize, 0, unusedSize); 1248 + DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1)); 1412 1249 } 1413 1250 1414 1251 /* Write table description header */ ··· 1416 1263 if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { 1417 1264 return HUF_compressCTable_internal(ostart, op, oend, 1418 1265 src, srcSize, 1419 - nbStreams, oldHufTable, bmi2); 1266 + nbStreams, oldHufTable, flags); 1420 1267 } } 1421 1268 1422 1269 /* Use the new huffman table */ ··· 1428 1275 } 1429 1276 return HUF_compressCTable_internal(ostart, op, oend, 1430 1277 src, srcSize, 1431 - nbStreams, table->CTable, bmi2); 1432 - } 1433 - 1434 - 1435 - size_t HUF_compress1X_wksp (void* dst, size_t dstSize, 1436 - const void* src, size_t srcSize, 1437 - unsigned maxSymbolValue, unsigned huffLog, 1438 - void* workSpace, size_t wkspSize) 1439 - { 1440 - return HUF_compress_internal(dst, dstSize, src, srcSize, 1441 - maxSymbolValue, huffLog, HUF_singleStream, 1442 - workSpace, wkspSize, 1443 - NULL, NULL, 0, 0 /*bmi2*/, 0); 1278 + nbStreams, table->CTable, flags); 1444 1279 } 1445 1280 1446 1281 size_t HUF_compress1X_repeat (void* dst, size_t dstSize, 1447 1282 const void* src, size_t srcSize, 1448 1283 unsigned maxSymbolValue, unsigned huffLog, 1449 1284 void* workSpace, size_t wkspSize, 1450 - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, 1451 - int bmi2, unsigned suspectUncompressible) 1285 + HUF_CElt* hufTable, HUF_repeat* repeat, int flags) 1452 1286 { 1287 + DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize); 1453 1288 return HUF_compress_internal(dst, dstSize, src, srcSize, 1454 1289 maxSymbolValue, huffLog, HUF_singleStream, 1455 1290 workSpace, wkspSize, hufTable, 1456 - repeat, preferRepeat, bmi2, suspectUncompressible); 1457 - } 1458 - 1459 - /* HUF_compress4X_repeat(): 1460 - * compress input using 4 streams. 1461 - * provide workspace to generate compression tables */ 1462 - size_t HUF_compress4X_wksp (void* dst, size_t dstSize, 1463 - const void* src, size_t srcSize, 1464 - unsigned maxSymbolValue, unsigned huffLog, 1465 - void* workSpace, size_t wkspSize) 1466 - { 1467 - return HUF_compress_internal(dst, dstSize, src, srcSize, 1468 - maxSymbolValue, huffLog, HUF_fourStreams, 1469 - workSpace, wkspSize, 1470 - NULL, NULL, 0, 0 /*bmi2*/, 0); 1291 + repeat, flags); 1471 1292 } 1472 1293 1473 1294 /* HUF_compress4X_repeat(): 1474 1295 * compress input using 4 streams. 1475 1296 * consider skipping quickly 1476 - * re-use an existing huffman compression table */ 1297 + * reuse an existing huffman compression table */ 1477 1298 size_t HUF_compress4X_repeat (void* dst, size_t dstSize, 1478 1299 const void* src, size_t srcSize, 1479 1300 unsigned maxSymbolValue, unsigned huffLog, 1480 1301 void* workSpace, size_t wkspSize, 1481 - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible) 1302 + HUF_CElt* hufTable, HUF_repeat* repeat, int flags) 1482 1303 { 1304 + DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize); 1483 1305 return HUF_compress_internal(dst, dstSize, src, srcSize, 1484 1306 maxSymbolValue, huffLog, HUF_fourStreams, 1485 1307 workSpace, wkspSize, 1486 - hufTable, repeat, preferRepeat, bmi2, suspectUncompressible); 1308 + hufTable, repeat, flags); 1487 1309 } 1488 -
+2394 -889
lib/zstd/compress/zstd_compress.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 12 11 /*-************************************* 13 12 * Dependencies 14 13 ***************************************/ 14 + #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */ 15 15 #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */ 16 16 #include "../common/mem.h" 17 + #include "../common/error_private.h" 17 18 #include "hist.h" /* HIST_countFast_wksp */ 18 19 #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ 19 20 #include "../common/fse.h" 20 - #define HUF_STATIC_LINKING_ONLY 21 21 #include "../common/huf.h" 22 22 #include "zstd_compress_internal.h" 23 23 #include "zstd_compress_sequences.h" ··· 29 27 #include "zstd_opt.h" 30 28 #include "zstd_ldm.h" 31 29 #include "zstd_compress_superblock.h" 30 + #include "../common/bits.h" /* ZSTD_highbit32, ZSTD_rotateRight_U64 */ 32 31 33 32 /* *************************************************************** 34 33 * Tuning parameters ··· 47 44 * in log format, aka 17 => 1 << 17 == 128Ki positions. 48 45 * This structure is only used in zstd_opt. 49 46 * Since allocation is centralized for all strategies, it has to be known here. 50 - * The actual (selected) size of the hash table is then stored in ZSTD_matchState_t.hashLog3, 47 + * The actual (selected) size of the hash table is then stored in ZSTD_MatchState_t.hashLog3, 51 48 * so that zstd_opt.c doesn't need to know about this constant. 52 49 */ 53 50 #ifndef ZSTD_HASHLOG3_MAX ··· 58 55 * Helper functions 59 56 ***************************************/ 60 57 /* ZSTD_compressBound() 61 - * Note that the result from this function is only compatible with the "normal" 62 - * full-block strategy. 63 - * When there are a lot of small blocks due to frequent flush in streaming mode 64 - * the overhead of headers can make the compressed data to be larger than the 65 - * return value of ZSTD_compressBound(). 58 + * Note that the result from this function is only valid for 59 + * the one-pass compression functions. 60 + * When employing the streaming mode, 61 + * if flushes are frequently altering the size of blocks, 62 + * the overhead from block headers can make the compressed data larger 63 + * than the return value of ZSTD_compressBound(). 66 64 */ 67 65 size_t ZSTD_compressBound(size_t srcSize) { 68 - return ZSTD_COMPRESSBOUND(srcSize); 66 + size_t const r = ZSTD_COMPRESSBOUND(srcSize); 67 + if (r==0) return ERROR(srcSize_wrong); 68 + return r; 69 69 } 70 70 71 71 ··· 81 75 ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */ 82 76 U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ 83 77 ZSTD_cwksp workspace; 84 - ZSTD_matchState_t matchState; 78 + ZSTD_MatchState_t matchState; 85 79 ZSTD_compressedBlockState_t cBlockState; 86 80 ZSTD_customMem customMem; 87 81 U32 dictID; 88 82 int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ 89 - ZSTD_paramSwitch_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use 83 + ZSTD_ParamSwitch_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use 90 84 * row-based matchfinder. Unless the cdict is reloaded, we will use 91 85 * the same greedy/lazy matchfinder at compression time. 92 86 */ ··· 136 130 ZSTD_cwksp_move(&cctx->workspace, &ws); 137 131 cctx->staticSize = workspaceSize; 138 132 139 - /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ 140 - if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; 133 + /* statically sized space. tmpWorkspace never moves (but prev/next block swap places) */ 134 + if (!ZSTD_cwksp_check_available(&cctx->workspace, TMP_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; 141 135 cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); 142 136 cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); 143 - cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE); 137 + cctx->tmpWorkspace = ZSTD_cwksp_reserve_object(&cctx->workspace, TMP_WORKSPACE_SIZE); 138 + cctx->tmpWkspSize = TMP_WORKSPACE_SIZE; 144 139 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); 145 140 return cctx; 146 141 } ··· 175 168 176 169 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) 177 170 { 171 + DEBUGLOG(3, "ZSTD_freeCCtx (address: %p)", (void*)cctx); 178 172 if (cctx==NULL) return 0; /* support free on NULL */ 179 173 RETURN_ERROR_IF(cctx->staticSize, memory_allocation, 180 174 "not compatible with static CCtx"); 181 - { 182 - int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); 175 + { int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); 183 176 ZSTD_freeCCtxContent(cctx); 184 - if (!cctxInWorkspace) { 185 - ZSTD_customFree(cctx, cctx->customMem); 186 - } 177 + if (!cctxInWorkspace) ZSTD_customFree(cctx, cctx->customMem); 187 178 } 188 179 return 0; 189 180 } ··· 210 205 } 211 206 212 207 /* private API call, for dictBuilder only */ 213 - const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } 208 + const SeqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } 214 209 215 210 /* Returns true if the strategy supports using a row based matchfinder */ 216 211 static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) { ··· 220 215 /* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder 221 216 * for this compression. 222 217 */ 223 - static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_paramSwitch_e mode) { 218 + static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_ParamSwitch_e mode) { 224 219 assert(mode != ZSTD_ps_auto); 225 220 return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_ps_enable); 226 221 } 227 222 228 223 /* Returns row matchfinder usage given an initial mode and cParams */ 229 - static ZSTD_paramSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_paramSwitch_e mode, 224 + static ZSTD_ParamSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_ParamSwitch_e mode, 230 225 const ZSTD_compressionParameters* const cParams) { 231 - #if defined(ZSTD_ARCH_X86_SSE2) || defined(ZSTD_ARCH_ARM_NEON) 232 - int const kHasSIMD128 = 1; 233 - #else 234 - int const kHasSIMD128 = 0; 235 - #endif 226 + /* The Linux Kernel does not use SIMD, and 128KB is a very common size, e.g. in BtrFS. 227 + * The row match finder is slower for this size without SIMD, so disable it. 228 + */ 229 + const unsigned kWindowLogLowerBound = 17; 236 230 if (mode != ZSTD_ps_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */ 237 231 mode = ZSTD_ps_disable; 238 232 if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode; 239 - if (kHasSIMD128) { 240 - if (cParams->windowLog > 14) mode = ZSTD_ps_enable; 241 - } else { 242 - if (cParams->windowLog > 17) mode = ZSTD_ps_enable; 243 - } 233 + if (cParams->windowLog > kWindowLogLowerBound) mode = ZSTD_ps_enable; 244 234 return mode; 245 235 } 246 236 247 237 /* Returns block splitter usage (generally speaking, when using slower/stronger compression modes) */ 248 - static ZSTD_paramSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_paramSwitch_e mode, 238 + static ZSTD_ParamSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_ParamSwitch_e mode, 249 239 const ZSTD_compressionParameters* const cParams) { 250 240 if (mode != ZSTD_ps_auto) return mode; 251 241 return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17) ? ZSTD_ps_enable : ZSTD_ps_disable; ··· 248 248 249 249 /* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */ 250 250 static int ZSTD_allocateChainTable(const ZSTD_strategy strategy, 251 - const ZSTD_paramSwitch_e useRowMatchFinder, 251 + const ZSTD_ParamSwitch_e useRowMatchFinder, 252 252 const U32 forDDSDict) { 253 253 assert(useRowMatchFinder != ZSTD_ps_auto); 254 254 /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate. ··· 257 257 return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder)); 258 258 } 259 259 260 - /* Returns 1 if compression parameters are such that we should 260 + /* Returns ZSTD_ps_enable if compression parameters are such that we should 261 261 * enable long distance matching (wlog >= 27, strategy >= btopt). 262 - * Returns 0 otherwise. 262 + * Returns ZSTD_ps_disable otherwise. 263 263 */ 264 - static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode, 264 + static ZSTD_ParamSwitch_e ZSTD_resolveEnableLdm(ZSTD_ParamSwitch_e mode, 265 265 const ZSTD_compressionParameters* const cParams) { 266 266 if (mode != ZSTD_ps_auto) return mode; 267 267 return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable; 268 + } 269 + 270 + static int ZSTD_resolveExternalSequenceValidation(int mode) { 271 + return mode; 272 + } 273 + 274 + /* Resolves maxBlockSize to the default if no value is present. */ 275 + static size_t ZSTD_resolveMaxBlockSize(size_t maxBlockSize) { 276 + if (maxBlockSize == 0) { 277 + return ZSTD_BLOCKSIZE_MAX; 278 + } else { 279 + return maxBlockSize; 280 + } 281 + } 282 + 283 + static ZSTD_ParamSwitch_e ZSTD_resolveExternalRepcodeSearch(ZSTD_ParamSwitch_e value, int cLevel) { 284 + if (value != ZSTD_ps_auto) return value; 285 + if (cLevel < 10) { 286 + return ZSTD_ps_disable; 287 + } else { 288 + return ZSTD_ps_enable; 289 + } 290 + } 291 + 292 + /* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged. 293 + * If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */ 294 + static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) { 295 + return cParams->strategy == ZSTD_fast || cParams->strategy == ZSTD_dfast; 268 296 } 269 297 270 298 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( ··· 310 282 assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog); 311 283 assert(cctxParams.ldmParams.hashRateLog < 32); 312 284 } 313 - cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams); 285 + cctxParams.postBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.postBlockSplitter, &cParams); 314 286 cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); 287 + cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences); 288 + cctxParams.maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams.maxBlockSize); 289 + cctxParams.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams.searchForExternalRepcodes, 290 + cctxParams.compressionLevel); 315 291 assert(!ZSTD_checkCParams(cParams)); 316 292 return cctxParams; 317 293 } ··· 361 329 #define ZSTD_NO_CLEVEL 0 362 330 363 331 /* 364 - * Initializes the cctxParams from params and compressionLevel. 332 + * Initializes `cctxParams` from `params` and `compressionLevel`. 365 333 * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. 366 334 */ 367 - static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel) 335 + static void 336 + ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, 337 + const ZSTD_parameters* params, 338 + int compressionLevel) 368 339 { 369 340 assert(!ZSTD_checkCParams(params->cParams)); 370 341 ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); ··· 378 343 */ 379 344 cctxParams->compressionLevel = compressionLevel; 380 345 cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, &params->cParams); 381 - cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, &params->cParams); 346 + cctxParams->postBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->postBlockSplitter, &params->cParams); 382 347 cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, &params->cParams); 348 + cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences); 349 + cctxParams->maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams->maxBlockSize); 350 + cctxParams->searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams->searchForExternalRepcodes, compressionLevel); 383 351 DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d", 384 - cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm); 352 + cctxParams->useRowMatchFinder, cctxParams->postBlockSplitter, cctxParams->ldmParams.enableLdm); 385 353 } 386 354 387 355 size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) ··· 397 359 398 360 /* 399 361 * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone. 400 - * @param param Validated zstd parameters. 362 + * @param params Validated zstd parameters. 401 363 */ 402 364 static void ZSTD_CCtxParams_setZstdParams( 403 365 ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) ··· 493 455 return bounds; 494 456 495 457 case ZSTD_c_enableLongDistanceMatching: 496 - bounds.lowerBound = 0; 497 - bounds.upperBound = 1; 458 + bounds.lowerBound = (int)ZSTD_ps_auto; 459 + bounds.upperBound = (int)ZSTD_ps_disable; 498 460 return bounds; 499 461 500 462 case ZSTD_c_ldmHashLog: ··· 572 534 bounds.upperBound = 1; 573 535 return bounds; 574 536 575 - case ZSTD_c_useBlockSplitter: 537 + case ZSTD_c_splitAfterSequences: 576 538 bounds.lowerBound = (int)ZSTD_ps_auto; 577 539 bounds.upperBound = (int)ZSTD_ps_disable; 540 + return bounds; 541 + 542 + case ZSTD_c_blockSplitterLevel: 543 + bounds.lowerBound = 0; 544 + bounds.upperBound = ZSTD_BLOCKSPLITTER_LEVEL_MAX; 578 545 return bounds; 579 546 580 547 case ZSTD_c_useRowMatchFinder: ··· 590 547 case ZSTD_c_deterministicRefPrefix: 591 548 bounds.lowerBound = 0; 592 549 bounds.upperBound = 1; 550 + return bounds; 551 + 552 + case ZSTD_c_prefetchCDictTables: 553 + bounds.lowerBound = (int)ZSTD_ps_auto; 554 + bounds.upperBound = (int)ZSTD_ps_disable; 555 + return bounds; 556 + 557 + case ZSTD_c_enableSeqProducerFallback: 558 + bounds.lowerBound = 0; 559 + bounds.upperBound = 1; 560 + return bounds; 561 + 562 + case ZSTD_c_maxBlockSize: 563 + bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN; 564 + bounds.upperBound = ZSTD_BLOCKSIZE_MAX; 565 + return bounds; 566 + 567 + case ZSTD_c_repcodeResolution: 568 + bounds.lowerBound = (int)ZSTD_ps_auto; 569 + bounds.upperBound = (int)ZSTD_ps_disable; 593 570 return bounds; 594 571 595 572 default: ··· 630 567 return 0; 631 568 } 632 569 633 - #define BOUNDCHECK(cParam, val) { \ 634 - RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ 635 - parameter_outOfBound, "Param out of bounds"); \ 636 - } 570 + #define BOUNDCHECK(cParam, val) \ 571 + do { \ 572 + RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ 573 + parameter_outOfBound, "Param out of bounds"); \ 574 + } while (0) 637 575 638 576 639 577 static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) ··· 648 584 case ZSTD_c_minMatch: 649 585 case ZSTD_c_targetLength: 650 586 case ZSTD_c_strategy: 587 + case ZSTD_c_blockSplitterLevel: 651 588 return 1; 652 589 653 590 case ZSTD_c_format: ··· 675 610 case ZSTD_c_stableOutBuffer: 676 611 case ZSTD_c_blockDelimiters: 677 612 case ZSTD_c_validateSequences: 678 - case ZSTD_c_useBlockSplitter: 613 + case ZSTD_c_splitAfterSequences: 679 614 case ZSTD_c_useRowMatchFinder: 680 615 case ZSTD_c_deterministicRefPrefix: 616 + case ZSTD_c_prefetchCDictTables: 617 + case ZSTD_c_enableSeqProducerFallback: 618 + case ZSTD_c_maxBlockSize: 619 + case ZSTD_c_repcodeResolution: 681 620 default: 682 621 return 0; 683 622 } ··· 694 625 if (ZSTD_isUpdateAuthorized(param)) { 695 626 cctx->cParamsChanged = 1; 696 627 } else { 697 - RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); 628 + RETURN_ERROR(stage_wrong, "can only set params in cctx init stage"); 698 629 } } 699 630 700 631 switch(param) ··· 734 665 case ZSTD_c_stableOutBuffer: 735 666 case ZSTD_c_blockDelimiters: 736 667 case ZSTD_c_validateSequences: 737 - case ZSTD_c_useBlockSplitter: 668 + case ZSTD_c_splitAfterSequences: 669 + case ZSTD_c_blockSplitterLevel: 738 670 case ZSTD_c_useRowMatchFinder: 739 671 case ZSTD_c_deterministicRefPrefix: 672 + case ZSTD_c_prefetchCDictTables: 673 + case ZSTD_c_enableSeqProducerFallback: 674 + case ZSTD_c_maxBlockSize: 675 + case ZSTD_c_repcodeResolution: 740 676 break; 741 677 742 678 default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); ··· 797 723 case ZSTD_c_minMatch : 798 724 if (value!=0) /* 0 => use default */ 799 725 BOUNDCHECK(ZSTD_c_minMatch, value); 800 - CCtxParams->cParams.minMatch = value; 726 + CCtxParams->cParams.minMatch = (U32)value; 801 727 return CCtxParams->cParams.minMatch; 802 728 803 729 case ZSTD_c_targetLength : 804 730 BOUNDCHECK(ZSTD_c_targetLength, value); 805 - CCtxParams->cParams.targetLength = value; 731 + CCtxParams->cParams.targetLength = (U32)value; 806 732 return CCtxParams->cParams.targetLength; 807 733 808 734 case ZSTD_c_strategy : ··· 815 741 /* Content size written in frame header _when known_ (default:1) */ 816 742 DEBUGLOG(4, "set content size flag = %u", (value!=0)); 817 743 CCtxParams->fParams.contentSizeFlag = value != 0; 818 - return CCtxParams->fParams.contentSizeFlag; 744 + return (size_t)CCtxParams->fParams.contentSizeFlag; 819 745 820 746 case ZSTD_c_checksumFlag : 821 747 /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ 822 748 CCtxParams->fParams.checksumFlag = value != 0; 823 - return CCtxParams->fParams.checksumFlag; 749 + return (size_t)CCtxParams->fParams.checksumFlag; 824 750 825 751 case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ 826 752 DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); ··· 829 755 830 756 case ZSTD_c_forceMaxWindow : 831 757 CCtxParams->forceWindow = (value != 0); 832 - return CCtxParams->forceWindow; 758 + return (size_t)CCtxParams->forceWindow; 833 759 834 760 case ZSTD_c_forceAttachDict : { 835 761 const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; 836 - BOUNDCHECK(ZSTD_c_forceAttachDict, pref); 762 + BOUNDCHECK(ZSTD_c_forceAttachDict, (int)pref); 837 763 CCtxParams->attachDictPref = pref; 838 764 return CCtxParams->attachDictPref; 839 765 } 840 766 841 767 case ZSTD_c_literalCompressionMode : { 842 - const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value; 843 - BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); 768 + const ZSTD_ParamSwitch_e lcm = (ZSTD_ParamSwitch_e)value; 769 + BOUNDCHECK(ZSTD_c_literalCompressionMode, (int)lcm); 844 770 CCtxParams->literalCompressionMode = lcm; 845 771 return CCtxParams->literalCompressionMode; 846 772 } ··· 863 789 864 790 case ZSTD_c_enableDedicatedDictSearch : 865 791 CCtxParams->enableDedicatedDictSearch = (value!=0); 866 - return CCtxParams->enableDedicatedDictSearch; 792 + return (size_t)CCtxParams->enableDedicatedDictSearch; 867 793 868 794 case ZSTD_c_enableLongDistanceMatching : 869 - CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value; 795 + BOUNDCHECK(ZSTD_c_enableLongDistanceMatching, value); 796 + CCtxParams->ldmParams.enableLdm = (ZSTD_ParamSwitch_e)value; 870 797 return CCtxParams->ldmParams.enableLdm; 871 798 872 799 case ZSTD_c_ldmHashLog : 873 800 if (value!=0) /* 0 ==> auto */ 874 801 BOUNDCHECK(ZSTD_c_ldmHashLog, value); 875 - CCtxParams->ldmParams.hashLog = value; 802 + CCtxParams->ldmParams.hashLog = (U32)value; 876 803 return CCtxParams->ldmParams.hashLog; 877 804 878 805 case ZSTD_c_ldmMinMatch : 879 806 if (value!=0) /* 0 ==> default */ 880 807 BOUNDCHECK(ZSTD_c_ldmMinMatch, value); 881 - CCtxParams->ldmParams.minMatchLength = value; 808 + CCtxParams->ldmParams.minMatchLength = (U32)value; 882 809 return CCtxParams->ldmParams.minMatchLength; 883 810 884 811 case ZSTD_c_ldmBucketSizeLog : 885 812 if (value!=0) /* 0 ==> default */ 886 813 BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); 887 - CCtxParams->ldmParams.bucketSizeLog = value; 814 + CCtxParams->ldmParams.bucketSizeLog = (U32)value; 888 815 return CCtxParams->ldmParams.bucketSizeLog; 889 816 890 817 case ZSTD_c_ldmHashRateLog : 891 818 if (value!=0) /* 0 ==> default */ 892 819 BOUNDCHECK(ZSTD_c_ldmHashRateLog, value); 893 - CCtxParams->ldmParams.hashRateLog = value; 820 + CCtxParams->ldmParams.hashRateLog = (U32)value; 894 821 return CCtxParams->ldmParams.hashRateLog; 895 822 896 823 case ZSTD_c_targetCBlockSize : 897 - if (value!=0) /* 0 ==> default */ 824 + if (value!=0) { /* 0 ==> default */ 825 + value = MAX(value, ZSTD_TARGETCBLOCKSIZE_MIN); 898 826 BOUNDCHECK(ZSTD_c_targetCBlockSize, value); 899 - CCtxParams->targetCBlockSize = value; 827 + } 828 + CCtxParams->targetCBlockSize = (U32)value; 900 829 return CCtxParams->targetCBlockSize; 901 830 902 831 case ZSTD_c_srcSizeHint : 903 832 if (value!=0) /* 0 ==> default */ 904 833 BOUNDCHECK(ZSTD_c_srcSizeHint, value); 905 834 CCtxParams->srcSizeHint = value; 906 - return CCtxParams->srcSizeHint; 835 + return (size_t)CCtxParams->srcSizeHint; 907 836 908 837 case ZSTD_c_stableInBuffer: 909 838 BOUNDCHECK(ZSTD_c_stableInBuffer, value); ··· 920 843 921 844 case ZSTD_c_blockDelimiters: 922 845 BOUNDCHECK(ZSTD_c_blockDelimiters, value); 923 - CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value; 846 + CCtxParams->blockDelimiters = (ZSTD_SequenceFormat_e)value; 924 847 return CCtxParams->blockDelimiters; 925 848 926 849 case ZSTD_c_validateSequences: 927 850 BOUNDCHECK(ZSTD_c_validateSequences, value); 928 851 CCtxParams->validateSequences = value; 929 - return CCtxParams->validateSequences; 852 + return (size_t)CCtxParams->validateSequences; 930 853 931 - case ZSTD_c_useBlockSplitter: 932 - BOUNDCHECK(ZSTD_c_useBlockSplitter, value); 933 - CCtxParams->useBlockSplitter = (ZSTD_paramSwitch_e)value; 934 - return CCtxParams->useBlockSplitter; 854 + case ZSTD_c_splitAfterSequences: 855 + BOUNDCHECK(ZSTD_c_splitAfterSequences, value); 856 + CCtxParams->postBlockSplitter = (ZSTD_ParamSwitch_e)value; 857 + return CCtxParams->postBlockSplitter; 858 + 859 + case ZSTD_c_blockSplitterLevel: 860 + BOUNDCHECK(ZSTD_c_blockSplitterLevel, value); 861 + CCtxParams->preBlockSplitter_level = value; 862 + return (size_t)CCtxParams->preBlockSplitter_level; 935 863 936 864 case ZSTD_c_useRowMatchFinder: 937 865 BOUNDCHECK(ZSTD_c_useRowMatchFinder, value); 938 - CCtxParams->useRowMatchFinder = (ZSTD_paramSwitch_e)value; 866 + CCtxParams->useRowMatchFinder = (ZSTD_ParamSwitch_e)value; 939 867 return CCtxParams->useRowMatchFinder; 940 868 941 869 case ZSTD_c_deterministicRefPrefix: 942 870 BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value); 943 871 CCtxParams->deterministicRefPrefix = !!value; 944 - return CCtxParams->deterministicRefPrefix; 872 + return (size_t)CCtxParams->deterministicRefPrefix; 873 + 874 + case ZSTD_c_prefetchCDictTables: 875 + BOUNDCHECK(ZSTD_c_prefetchCDictTables, value); 876 + CCtxParams->prefetchCDictTables = (ZSTD_ParamSwitch_e)value; 877 + return CCtxParams->prefetchCDictTables; 878 + 879 + case ZSTD_c_enableSeqProducerFallback: 880 + BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value); 881 + CCtxParams->enableMatchFinderFallback = value; 882 + return (size_t)CCtxParams->enableMatchFinderFallback; 883 + 884 + case ZSTD_c_maxBlockSize: 885 + if (value!=0) /* 0 ==> default */ 886 + BOUNDCHECK(ZSTD_c_maxBlockSize, value); 887 + assert(value>=0); 888 + CCtxParams->maxBlockSize = (size_t)value; 889 + return CCtxParams->maxBlockSize; 890 + 891 + case ZSTD_c_repcodeResolution: 892 + BOUNDCHECK(ZSTD_c_repcodeResolution, value); 893 + CCtxParams->searchForExternalRepcodes = (ZSTD_ParamSwitch_e)value; 894 + return CCtxParams->searchForExternalRepcodes; 945 895 946 896 default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); 947 897 } ··· 985 881 switch(param) 986 882 { 987 883 case ZSTD_c_format : 988 - *value = CCtxParams->format; 884 + *value = (int)CCtxParams->format; 989 885 break; 990 886 case ZSTD_c_compressionLevel : 991 887 *value = CCtxParams->compressionLevel; ··· 1000 896 *value = (int)CCtxParams->cParams.chainLog; 1001 897 break; 1002 898 case ZSTD_c_searchLog : 1003 - *value = CCtxParams->cParams.searchLog; 899 + *value = (int)CCtxParams->cParams.searchLog; 1004 900 break; 1005 901 case ZSTD_c_minMatch : 1006 - *value = CCtxParams->cParams.minMatch; 902 + *value = (int)CCtxParams->cParams.minMatch; 1007 903 break; 1008 904 case ZSTD_c_targetLength : 1009 - *value = CCtxParams->cParams.targetLength; 905 + *value = (int)CCtxParams->cParams.targetLength; 1010 906 break; 1011 907 case ZSTD_c_strategy : 1012 - *value = (unsigned)CCtxParams->cParams.strategy; 908 + *value = (int)CCtxParams->cParams.strategy; 1013 909 break; 1014 910 case ZSTD_c_contentSizeFlag : 1015 911 *value = CCtxParams->fParams.contentSizeFlag; ··· 1024 920 *value = CCtxParams->forceWindow; 1025 921 break; 1026 922 case ZSTD_c_forceAttachDict : 1027 - *value = CCtxParams->attachDictPref; 923 + *value = (int)CCtxParams->attachDictPref; 1028 924 break; 1029 925 case ZSTD_c_literalCompressionMode : 1030 - *value = CCtxParams->literalCompressionMode; 926 + *value = (int)CCtxParams->literalCompressionMode; 1031 927 break; 1032 928 case ZSTD_c_nbWorkers : 1033 929 assert(CCtxParams->nbWorkers == 0); ··· 1043 939 *value = CCtxParams->enableDedicatedDictSearch; 1044 940 break; 1045 941 case ZSTD_c_enableLongDistanceMatching : 1046 - *value = CCtxParams->ldmParams.enableLdm; 942 + *value = (int)CCtxParams->ldmParams.enableLdm; 1047 943 break; 1048 944 case ZSTD_c_ldmHashLog : 1049 - *value = CCtxParams->ldmParams.hashLog; 945 + *value = (int)CCtxParams->ldmParams.hashLog; 1050 946 break; 1051 947 case ZSTD_c_ldmMinMatch : 1052 - *value = CCtxParams->ldmParams.minMatchLength; 948 + *value = (int)CCtxParams->ldmParams.minMatchLength; 1053 949 break; 1054 950 case ZSTD_c_ldmBucketSizeLog : 1055 - *value = CCtxParams->ldmParams.bucketSizeLog; 951 + *value = (int)CCtxParams->ldmParams.bucketSizeLog; 1056 952 break; 1057 953 case ZSTD_c_ldmHashRateLog : 1058 - *value = CCtxParams->ldmParams.hashRateLog; 954 + *value = (int)CCtxParams->ldmParams.hashRateLog; 1059 955 break; 1060 956 case ZSTD_c_targetCBlockSize : 1061 957 *value = (int)CCtxParams->targetCBlockSize; ··· 1075 971 case ZSTD_c_validateSequences : 1076 972 *value = (int)CCtxParams->validateSequences; 1077 973 break; 1078 - case ZSTD_c_useBlockSplitter : 1079 - *value = (int)CCtxParams->useBlockSplitter; 974 + case ZSTD_c_splitAfterSequences : 975 + *value = (int)CCtxParams->postBlockSplitter; 976 + break; 977 + case ZSTD_c_blockSplitterLevel : 978 + *value = CCtxParams->preBlockSplitter_level; 1080 979 break; 1081 980 case ZSTD_c_useRowMatchFinder : 1082 981 *value = (int)CCtxParams->useRowMatchFinder; 1083 982 break; 1084 983 case ZSTD_c_deterministicRefPrefix: 1085 984 *value = (int)CCtxParams->deterministicRefPrefix; 985 + break; 986 + case ZSTD_c_prefetchCDictTables: 987 + *value = (int)CCtxParams->prefetchCDictTables; 988 + break; 989 + case ZSTD_c_enableSeqProducerFallback: 990 + *value = CCtxParams->enableMatchFinderFallback; 991 + break; 992 + case ZSTD_c_maxBlockSize: 993 + *value = (int)CCtxParams->maxBlockSize; 994 + break; 995 + case ZSTD_c_repcodeResolution: 996 + *value = (int)CCtxParams->searchForExternalRepcodes; 1086 997 break; 1087 998 default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); 1088 999 } ··· 1125 1006 return 0; 1126 1007 } 1127 1008 1009 + size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams) 1010 + { 1011 + ZSTD_STATIC_ASSERT(sizeof(cparams) == 7 * 4 /* all params are listed below */); 1012 + DEBUGLOG(4, "ZSTD_CCtx_setCParams"); 1013 + /* only update if all parameters are valid */ 1014 + FORWARD_IF_ERROR(ZSTD_checkCParams(cparams), ""); 1015 + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, (int)cparams.windowLog), ""); 1016 + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, (int)cparams.chainLog), ""); 1017 + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, (int)cparams.hashLog), ""); 1018 + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, (int)cparams.searchLog), ""); 1019 + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, (int)cparams.minMatch), ""); 1020 + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, (int)cparams.targetLength), ""); 1021 + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, (int)cparams.strategy), ""); 1022 + return 0; 1023 + } 1024 + 1025 + size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams) 1026 + { 1027 + ZSTD_STATIC_ASSERT(sizeof(fparams) == 3 * 4 /* all params are listed below */); 1028 + DEBUGLOG(4, "ZSTD_CCtx_setFParams"); 1029 + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, fparams.contentSizeFlag != 0), ""); 1030 + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, fparams.checksumFlag != 0), ""); 1031 + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_dictIDFlag, fparams.noDictIDFlag == 0), ""); 1032 + return 0; 1033 + } 1034 + 1035 + size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params) 1036 + { 1037 + DEBUGLOG(4, "ZSTD_CCtx_setParams"); 1038 + /* First check cParams, because we want to update all or none. */ 1039 + FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); 1040 + /* Next set fParams, because this could fail if the cctx isn't in init stage. */ 1041 + FORWARD_IF_ERROR(ZSTD_CCtx_setFParams(cctx, params.fParams), ""); 1042 + /* Finally set cParams, which should succeed. */ 1043 + FORWARD_IF_ERROR(ZSTD_CCtx_setCParams(cctx, params.cParams), ""); 1044 + return 0; 1045 + } 1046 + 1128 1047 size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) 1129 1048 { 1130 - DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); 1049 + DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %llu bytes", pledgedSrcSize); 1131 1050 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1132 1051 "Can't set pledgedSrcSize when not in init stage."); 1133 1052 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; ··· 1181 1024 ZSTD_compressionParameters* cParams); 1182 1025 1183 1026 /* 1184 - * Initializes the local dict using the requested parameters. 1185 - * NOTE: This does not use the pledged src size, because it may be used for more 1186 - * than one compression. 1027 + * Initializes the local dictionary using requested parameters. 1028 + * NOTE: Initialization does not employ the pledged src size, 1029 + * because the dictionary may be used for multiple compressions. 1187 1030 */ 1188 1031 static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) 1189 1032 { ··· 1196 1039 return 0; 1197 1040 } 1198 1041 if (dl->cdict != NULL) { 1199 - assert(cctx->cdict == dl->cdict); 1200 1042 /* Local dictionary already initialized. */ 1043 + assert(cctx->cdict == dl->cdict); 1201 1044 return 0; 1202 1045 } 1203 1046 assert(dl->dictSize > 0); ··· 1217 1060 } 1218 1061 1219 1062 size_t ZSTD_CCtx_loadDictionary_advanced( 1220 - ZSTD_CCtx* cctx, const void* dict, size_t dictSize, 1221 - ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) 1063 + ZSTD_CCtx* cctx, 1064 + const void* dict, size_t dictSize, 1065 + ZSTD_dictLoadMethod_e dictLoadMethod, 1066 + ZSTD_dictContentType_e dictContentType) 1222 1067 { 1223 - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1224 - "Can't load a dictionary when ctx is not in init stage."); 1225 1068 DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); 1226 - ZSTD_clearAllDicts(cctx); /* in case one already exists */ 1227 - if (dict == NULL || dictSize == 0) /* no dictionary mode */ 1069 + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1070 + "Can't load a dictionary when cctx is not in init stage."); 1071 + ZSTD_clearAllDicts(cctx); /* erase any previously set dictionary */ 1072 + if (dict == NULL || dictSize == 0) /* no dictionary */ 1228 1073 return 0; 1229 1074 if (dictLoadMethod == ZSTD_dlm_byRef) { 1230 1075 cctx->localDict.dict = dict; 1231 1076 } else { 1077 + /* copy dictionary content inside CCtx to own its lifetime */ 1232 1078 void* dictBuffer; 1233 1079 RETURN_ERROR_IF(cctx->staticSize, memory_allocation, 1234 - "no malloc for static CCtx"); 1080 + "static CCtx can't allocate for an internal copy of dictionary"); 1235 1081 dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem); 1236 - RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); 1082 + RETURN_ERROR_IF(dictBuffer==NULL, memory_allocation, 1083 + "allocation failed for dictionary content"); 1237 1084 ZSTD_memcpy(dictBuffer, dict, dictSize); 1238 - cctx->localDict.dictBuffer = dictBuffer; 1239 - cctx->localDict.dict = dictBuffer; 1085 + cctx->localDict.dictBuffer = dictBuffer; /* owned ptr to free */ 1086 + cctx->localDict.dict = dictBuffer; /* read-only reference */ 1240 1087 } 1241 1088 cctx->localDict.dictSize = dictSize; 1242 1089 cctx->localDict.dictContentType = dictContentType; ··· 1310 1149 if ( (reset == ZSTD_reset_parameters) 1311 1150 || (reset == ZSTD_reset_session_and_parameters) ) { 1312 1151 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, 1313 - "Can't reset parameters only when not in init stage."); 1152 + "Reset parameters is only possible during init stage."); 1314 1153 ZSTD_clearAllDicts(cctx); 1315 1154 return ZSTD_CCtxParams_reset(&cctx->requestedParams); 1316 1155 } ··· 1329 1168 BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); 1330 1169 BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); 1331 1170 BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); 1332 - BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); 1171 + BOUNDCHECK(ZSTD_c_strategy, (int)cParams.strategy); 1333 1172 return 0; 1334 1173 } 1335 1174 ··· 1339 1178 static ZSTD_compressionParameters 1340 1179 ZSTD_clampCParams(ZSTD_compressionParameters cParams) 1341 1180 { 1342 - # define CLAMP_TYPE(cParam, val, type) { \ 1343 - ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ 1344 - if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \ 1345 - else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \ 1346 - } 1181 + # define CLAMP_TYPE(cParam, val, type) \ 1182 + do { \ 1183 + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ 1184 + if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \ 1185 + else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \ 1186 + } while (0) 1347 1187 # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) 1348 1188 CLAMP(ZSTD_c_windowLog, cParams.windowLog); 1349 1189 CLAMP(ZSTD_c_chainLog, cParams.chainLog); ··· 1402 1240 * optimize `cPar` for a specified input (`srcSize` and `dictSize`). 1403 1241 * mostly downsize to reduce memory consumption and initialization latency. 1404 1242 * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. 1405 - * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`. 1243 + * `mode` is the mode for parameter adjustment. See docs for `ZSTD_CParamMode_e`. 1406 1244 * note : `srcSize==0` means 0! 1407 1245 * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ 1408 1246 static ZSTD_compressionParameters 1409 1247 ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, 1410 1248 unsigned long long srcSize, 1411 1249 size_t dictSize, 1412 - ZSTD_cParamMode_e mode) 1250 + ZSTD_CParamMode_e mode, 1251 + ZSTD_ParamSwitch_e useRowMatchFinder) 1413 1252 { 1414 1253 const U64 minSrcSize = 513; /* (1<<9) + 1 */ 1415 1254 const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); 1416 1255 assert(ZSTD_checkCParams(cPar)==0); 1256 + 1257 + /* Cascade the selected strategy down to the next-highest one built into 1258 + * this binary. */ 1259 + #ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR 1260 + if (cPar.strategy == ZSTD_btultra2) { 1261 + cPar.strategy = ZSTD_btultra; 1262 + } 1263 + if (cPar.strategy == ZSTD_btultra) { 1264 + cPar.strategy = ZSTD_btopt; 1265 + } 1266 + #endif 1267 + #ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR 1268 + if (cPar.strategy == ZSTD_btopt) { 1269 + cPar.strategy = ZSTD_btlazy2; 1270 + } 1271 + #endif 1272 + #ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR 1273 + if (cPar.strategy == ZSTD_btlazy2) { 1274 + cPar.strategy = ZSTD_lazy2; 1275 + } 1276 + #endif 1277 + #ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR 1278 + if (cPar.strategy == ZSTD_lazy2) { 1279 + cPar.strategy = ZSTD_lazy; 1280 + } 1281 + #endif 1282 + #ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR 1283 + if (cPar.strategy == ZSTD_lazy) { 1284 + cPar.strategy = ZSTD_greedy; 1285 + } 1286 + #endif 1287 + #ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR 1288 + if (cPar.strategy == ZSTD_greedy) { 1289 + cPar.strategy = ZSTD_dfast; 1290 + } 1291 + #endif 1292 + #ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR 1293 + if (cPar.strategy == ZSTD_dfast) { 1294 + cPar.strategy = ZSTD_fast; 1295 + cPar.targetLength = 0; 1296 + } 1297 + #endif 1417 1298 1418 1299 switch (mode) { 1419 1300 case ZSTD_cpm_unknown: ··· 1486 1281 } 1487 1282 1488 1283 /* resize windowLog if input is small enough, to use less memory */ 1489 - if ( (srcSize < maxWindowResize) 1490 - && (dictSize < maxWindowResize) ) { 1284 + if ( (srcSize <= maxWindowResize) 1285 + && (dictSize <= maxWindowResize) ) { 1491 1286 U32 const tSize = (U32)(srcSize + dictSize); 1492 1287 static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; 1493 1288 U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : ··· 1505 1300 if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) 1506 1301 cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ 1507 1302 1303 + /* We can't use more than 32 bits of hash in total, so that means that we require: 1304 + * (hashLog + 8) <= 32 && (chainLog + 8) <= 32 1305 + */ 1306 + if (mode == ZSTD_cpm_createCDict && ZSTD_CDictIndicesAreTagged(&cPar)) { 1307 + U32 const maxShortCacheHashLog = 32 - ZSTD_SHORT_CACHE_TAG_BITS; 1308 + if (cPar.hashLog > maxShortCacheHashLog) { 1309 + cPar.hashLog = maxShortCacheHashLog; 1310 + } 1311 + if (cPar.chainLog > maxShortCacheHashLog) { 1312 + cPar.chainLog = maxShortCacheHashLog; 1313 + } 1314 + } 1315 + 1316 + 1317 + /* At this point, we aren't 100% sure if we are using the row match finder. 1318 + * Unless it is explicitly disabled, conservatively assume that it is enabled. 1319 + * In this case it will only be disabled for small sources, so shrinking the 1320 + * hash log a little bit shouldn't result in any ratio loss. 1321 + */ 1322 + if (useRowMatchFinder == ZSTD_ps_auto) 1323 + useRowMatchFinder = ZSTD_ps_enable; 1324 + 1325 + /* We can't hash more than 32-bits in total. So that means that we require: 1326 + * (hashLog - rowLog + 8) <= 32 1327 + */ 1328 + if (ZSTD_rowMatchFinderUsed(cPar.strategy, useRowMatchFinder)) { 1329 + /* Switch to 32-entry rows if searchLog is 5 (or more) */ 1330 + U32 const rowLog = BOUNDED(4, cPar.searchLog, 6); 1331 + U32 const maxRowHashLog = 32 - ZSTD_ROW_HASH_TAG_BITS; 1332 + U32 const maxHashLog = maxRowHashLog + rowLog; 1333 + assert(cPar.hashLog >= rowLog); 1334 + if (cPar.hashLog > maxHashLog) { 1335 + cPar.hashLog = maxHashLog; 1336 + } 1337 + } 1338 + 1508 1339 return cPar; 1509 1340 } 1510 1341 ··· 1551 1310 { 1552 1311 cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ 1553 1312 if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; 1554 - return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown); 1313 + return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto); 1555 1314 } 1556 1315 1557 - static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); 1558 - static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); 1316 + static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode); 1317 + static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode); 1559 1318 1560 1319 static void ZSTD_overrideCParams( 1561 1320 ZSTD_compressionParameters* cParams, ··· 1571 1330 } 1572 1331 1573 1332 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( 1574 - const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) 1333 + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode) 1575 1334 { 1576 1335 ZSTD_compressionParameters cParams; 1577 1336 if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { 1578 - srcSizeHint = CCtxParams->srcSizeHint; 1337 + assert(CCtxParams->srcSizeHint>=0); 1338 + srcSizeHint = (U64)CCtxParams->srcSizeHint; 1579 1339 } 1580 1340 cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); 1581 1341 if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; 1582 1342 ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); 1583 1343 assert(!ZSTD_checkCParams(cParams)); 1584 1344 /* srcSizeHint == 0 means 0 */ 1585 - return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode); 1345 + return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder); 1586 1346 } 1587 1347 1588 1348 static size_t 1589 1349 ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, 1590 - const ZSTD_paramSwitch_e useRowMatchFinder, 1591 - const U32 enableDedicatedDictSearch, 1350 + const ZSTD_ParamSwitch_e useRowMatchFinder, 1351 + const int enableDedicatedDictSearch, 1592 1352 const U32 forCCtx) 1593 1353 { 1594 1354 /* chain table size should be 0 for fast or row-hash strategies */ ··· 1605 1363 + hSize * sizeof(U32) 1606 1364 + h3Size * sizeof(U32); 1607 1365 size_t const optPotentialSpace = 1608 - ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32)) 1609 - + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32)) 1610 - + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32)) 1611 - + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32)) 1612 - + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)) 1613 - + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); 1366 + ZSTD_cwksp_aligned64_alloc_size((MaxML+1) * sizeof(U32)) 1367 + + ZSTD_cwksp_aligned64_alloc_size((MaxLL+1) * sizeof(U32)) 1368 + + ZSTD_cwksp_aligned64_alloc_size((MaxOff+1) * sizeof(U32)) 1369 + + ZSTD_cwksp_aligned64_alloc_size((1<<Litbits) * sizeof(U32)) 1370 + + ZSTD_cwksp_aligned64_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_match_t)) 1371 + + ZSTD_cwksp_aligned64_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t)); 1614 1372 size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder) 1615 - ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16)) 1373 + ? ZSTD_cwksp_aligned64_alloc_size(hSize) 1616 1374 : 0; 1617 1375 size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt)) 1618 1376 ? optPotentialSpace ··· 1628 1386 return tableSpace + optSpace + slackSpace + lazyAdditionalSpace; 1629 1387 } 1630 1388 1389 + /* Helper function for calculating memory requirements. 1390 + * Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */ 1391 + static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useSequenceProducer) { 1392 + U32 const divider = (minMatch==3 || useSequenceProducer) ? 3 : 4; 1393 + return blockSize / divider; 1394 + } 1395 + 1631 1396 static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( 1632 1397 const ZSTD_compressionParameters* cParams, 1633 1398 const ldmParams_t* ldmParams, 1634 1399 const int isStatic, 1635 - const ZSTD_paramSwitch_e useRowMatchFinder, 1400 + const ZSTD_ParamSwitch_e useRowMatchFinder, 1636 1401 const size_t buffInSize, 1637 1402 const size_t buffOutSize, 1638 - const U64 pledgedSrcSize) 1403 + const U64 pledgedSrcSize, 1404 + int useSequenceProducer, 1405 + size_t maxBlockSize) 1639 1406 { 1640 1407 size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); 1641 - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); 1642 - U32 const divider = (cParams->minMatch==3) ? 3 : 4; 1643 - size_t const maxNbSeq = blockSize / divider; 1408 + size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize); 1409 + size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer); 1644 1410 size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) 1645 - + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) 1411 + + ZSTD_cwksp_aligned64_alloc_size(maxNbSeq * sizeof(SeqDef)) 1646 1412 + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); 1647 - size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); 1413 + size_t const tmpWorkSpace = ZSTD_cwksp_alloc_size(TMP_WORKSPACE_SIZE); 1648 1414 size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); 1649 1415 size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1); 1650 1416 1651 1417 size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams); 1652 1418 size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize); 1653 1419 size_t const ldmSeqSpace = ldmParams->enableLdm == ZSTD_ps_enable ? 1654 - ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; 1420 + ZSTD_cwksp_aligned64_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; 1655 1421 1656 1422 1657 1423 size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) ··· 1667 1417 1668 1418 size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; 1669 1419 1420 + size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize); 1421 + size_t const externalSeqSpace = useSequenceProducer 1422 + ? ZSTD_cwksp_aligned64_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence)) 1423 + : 0; 1424 + 1670 1425 size_t const neededSpace = 1671 1426 cctxSpace + 1672 - entropySpace + 1427 + tmpWorkSpace + 1673 1428 blockStateSpace + 1674 1429 ldmSpace + 1675 1430 ldmSeqSpace + 1676 1431 matchStateSize + 1677 1432 tokenSpace + 1678 - bufferSpace; 1433 + bufferSpace + 1434 + externalSeqSpace; 1679 1435 1680 1436 DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); 1681 1437 return neededSpace; ··· 1691 1435 { 1692 1436 ZSTD_compressionParameters const cParams = 1693 1437 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); 1694 - ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, 1438 + ZSTD_ParamSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, 1695 1439 &cParams); 1696 1440 1697 1441 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); ··· 1699 1443 * be needed. However, we still allocate two 0-sized buffers, which can 1700 1444 * take space under ASAN. */ 1701 1445 return ZSTD_estimateCCtxSize_usingCCtxParams_internal( 1702 - &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); 1446 + &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize); 1703 1447 } 1704 1448 1705 1449 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) ··· 1749 1493 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); 1750 1494 { ZSTD_compressionParameters const cParams = 1751 1495 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); 1752 - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); 1496 + size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(params->maxBlockSize), (size_t)1 << cParams.windowLog); 1753 1497 size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered) 1754 1498 ? ((size_t)1 << cParams.windowLog) + blockSize 1755 1499 : 0; 1756 1500 size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) 1757 1501 ? ZSTD_compressBound(blockSize) + 1 1758 1502 : 0; 1759 - ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, &params->cParams); 1503 + ZSTD_ParamSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, &params->cParams); 1760 1504 1761 1505 return ZSTD_estimateCCtxSize_usingCCtxParams_internal( 1762 1506 &cParams, &params->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, 1763 - ZSTD_CONTENTSIZE_UNKNOWN); 1507 + ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize); 1764 1508 } 1765 1509 } 1766 1510 ··· 1856 1600 * Invalidate all the matches in the match finder tables. 1857 1601 * Requires nextSrc and base to be set (can be NULL). 1858 1602 */ 1859 - static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) 1603 + static void ZSTD_invalidateMatchState(ZSTD_MatchState_t* ms) 1860 1604 { 1861 1605 ZSTD_window_clear(&ms->window); 1862 1606 ··· 1893 1637 ZSTD_resetTarget_CCtx 1894 1638 } ZSTD_resetTarget_e; 1895 1639 1640 + /* Mixes bits in a 64 bits in a value, based on XXH3_rrmxmx */ 1641 + static U64 ZSTD_bitmix(U64 val, U64 len) { 1642 + val ^= ZSTD_rotateRight_U64(val, 49) ^ ZSTD_rotateRight_U64(val, 24); 1643 + val *= 0x9FB21C651E98DF25ULL; 1644 + val ^= (val >> 35) + len ; 1645 + val *= 0x9FB21C651E98DF25ULL; 1646 + return val ^ (val >> 28); 1647 + } 1648 + 1649 + /* Mixes in the hashSalt and hashSaltEntropy to create a new hashSalt */ 1650 + static void ZSTD_advanceHashSalt(ZSTD_MatchState_t* ms) { 1651 + ms->hashSalt = ZSTD_bitmix(ms->hashSalt, 8) ^ ZSTD_bitmix((U64) ms->hashSaltEntropy, 4); 1652 + } 1896 1653 1897 1654 static size_t 1898 - ZSTD_reset_matchState(ZSTD_matchState_t* ms, 1655 + ZSTD_reset_matchState(ZSTD_MatchState_t* ms, 1899 1656 ZSTD_cwksp* ws, 1900 1657 const ZSTD_compressionParameters* cParams, 1901 - const ZSTD_paramSwitch_e useRowMatchFinder, 1658 + const ZSTD_ParamSwitch_e useRowMatchFinder, 1902 1659 const ZSTD_compResetPolicy_e crp, 1903 1660 const ZSTD_indexResetPolicy_e forceResetIndex, 1904 1661 const ZSTD_resetTarget_e forWho) ··· 1933 1664 } 1934 1665 1935 1666 ms->hashLog3 = hashLog3; 1667 + ms->lazySkipping = 0; 1936 1668 1937 1669 ZSTD_invalidateMatchState(ms); 1938 1670 ··· 1955 1685 ZSTD_cwksp_clean_tables(ws); 1956 1686 } 1957 1687 1958 - /* opt parser space */ 1959 - if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { 1960 - DEBUGLOG(4, "reserving optimal parser space"); 1961 - ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned)); 1962 - ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); 1963 - ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); 1964 - ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); 1965 - ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); 1966 - ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); 1967 - } 1968 - 1969 1688 if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) { 1970 - { /* Row match finder needs an additional table of hashes ("tags") */ 1971 - size_t const tagTableSize = hSize*sizeof(U16); 1972 - ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize); 1973 - if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize); 1689 + /* Row match finder needs an additional table of hashes ("tags") */ 1690 + size_t const tagTableSize = hSize; 1691 + /* We want to generate a new salt in case we reset a Cctx, but we always want to use 1692 + * 0 when we reset a Cdict */ 1693 + if(forWho == ZSTD_resetTarget_CCtx) { 1694 + ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned_init_once(ws, tagTableSize); 1695 + ZSTD_advanceHashSalt(ms); 1696 + } else { 1697 + /* When we are not salting we want to always memset the memory */ 1698 + ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned64(ws, tagTableSize); 1699 + ZSTD_memset(ms->tagTable, 0, tagTableSize); 1700 + ms->hashSalt = 0; 1974 1701 } 1975 1702 { /* Switch to 32-entry rows if searchLog is 5 (or more) */ 1976 1703 U32 const rowLog = BOUNDED(4, cParams->searchLog, 6); 1977 1704 assert(cParams->hashLog >= rowLog); 1978 1705 ms->rowHashLog = cParams->hashLog - rowLog; 1979 1706 } 1707 + } 1708 + 1709 + /* opt parser space */ 1710 + if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { 1711 + DEBUGLOG(4, "reserving optimal parser space"); 1712 + ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned64(ws, (1<<Litbits) * sizeof(unsigned)); 1713 + ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned64(ws, (MaxLL+1) * sizeof(unsigned)); 1714 + ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned64(ws, (MaxML+1) * sizeof(unsigned)); 1715 + ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned64(ws, (MaxOff+1) * sizeof(unsigned)); 1716 + ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned64(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_match_t)); 1717 + ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned64(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t)); 1980 1718 } 1981 1719 1982 1720 ms->cParams = *cParams; ··· 2032 1754 { 2033 1755 ZSTD_cwksp* const ws = &zc->workspace; 2034 1756 DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d", 2035 - (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter); 1757 + (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->postBlockSplitter); 2036 1758 assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); 2037 1759 2038 1760 zc->isFirstBlock = 1; ··· 2044 1766 params = &zc->appliedParams; 2045 1767 2046 1768 assert(params->useRowMatchFinder != ZSTD_ps_auto); 2047 - assert(params->useBlockSplitter != ZSTD_ps_auto); 1769 + assert(params->postBlockSplitter != ZSTD_ps_auto); 2048 1770 assert(params->ldmParams.enableLdm != ZSTD_ps_auto); 1771 + assert(params->maxBlockSize != 0); 2049 1772 if (params->ldmParams.enableLdm == ZSTD_ps_enable) { 2050 1773 /* Adjust long distance matching parameters */ 2051 1774 ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, &params->cParams); ··· 2055 1776 } 2056 1777 2057 1778 { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); 2058 - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); 2059 - U32 const divider = (params->cParams.minMatch==3) ? 3 : 4; 2060 - size_t const maxNbSeq = blockSize / divider; 1779 + size_t const blockSize = MIN(params->maxBlockSize, windowSize); 1780 + size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, ZSTD_hasExtSeqProd(params)); 2061 1781 size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) 2062 1782 ? ZSTD_compressBound(blockSize) + 1 2063 1783 : 0; ··· 2073 1795 size_t const neededSpace = 2074 1796 ZSTD_estimateCCtxSize_usingCCtxParams_internal( 2075 1797 &params->cParams, &params->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, 2076 - buffInSize, buffOutSize, pledgedSrcSize); 2077 - int resizeWorkspace; 1798 + buffInSize, buffOutSize, pledgedSrcSize, ZSTD_hasExtSeqProd(params), params->maxBlockSize); 2078 1799 2079 1800 FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); 2080 1801 ··· 2082 1805 { /* Check if workspace is large enough, alloc a new one if needed */ 2083 1806 int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; 2084 1807 int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); 2085 - resizeWorkspace = workspaceTooSmall || workspaceWasteful; 1808 + int resizeWorkspace = workspaceTooSmall || workspaceWasteful; 2086 1809 DEBUGLOG(4, "Need %zu B workspace", neededSpace); 2087 1810 DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); 2088 1811 ··· 2100 1823 2101 1824 DEBUGLOG(5, "reserving object space"); 2102 1825 /* Statically sized space. 2103 - * entropyWorkspace never moves, 1826 + * tmpWorkspace never moves, 2104 1827 * though prev/next block swap places */ 2105 1828 assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); 2106 1829 zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); 2107 1830 RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); 2108 1831 zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); 2109 1832 RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); 2110 - zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE); 2111 - RETURN_ERROR_IF(zc->entropyWorkspace == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); 1833 + zc->tmpWorkspace = ZSTD_cwksp_reserve_object(ws, TMP_WORKSPACE_SIZE); 1834 + RETURN_ERROR_IF(zc->tmpWorkspace == NULL, memory_allocation, "couldn't allocate tmpWorkspace"); 1835 + zc->tmpWkspSize = TMP_WORKSPACE_SIZE; 2112 1836 } } 2113 1837 2114 1838 ZSTD_cwksp_clear(ws); 2115 1839 2116 1840 /* init params */ 2117 1841 zc->blockState.matchState.cParams = params->cParams; 1842 + zc->blockState.matchState.prefetchCDictTables = params->prefetchCDictTables == ZSTD_ps_enable; 2118 1843 zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; 2119 1844 zc->consumedSrcSize = 0; 2120 1845 zc->producedCSize = 0; ··· 2124 1845 zc->appliedParams.fParams.contentSizeFlag = 0; 2125 1846 DEBUGLOG(4, "pledged content size : %u ; flag : %u", 2126 1847 (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); 2127 - zc->blockSize = blockSize; 1848 + zc->blockSizeMax = blockSize; 2128 1849 2129 1850 xxh64_reset(&zc->xxhState, 0); 2130 1851 zc->stage = ZSTDcs_init; ··· 2133 1854 2134 1855 ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); 2135 1856 1857 + FORWARD_IF_ERROR(ZSTD_reset_matchState( 1858 + &zc->blockState.matchState, 1859 + ws, 1860 + &params->cParams, 1861 + params->useRowMatchFinder, 1862 + crp, 1863 + needsIndexReset, 1864 + ZSTD_resetTarget_CCtx), ""); 1865 + 1866 + zc->seqStore.sequencesStart = (SeqDef*)ZSTD_cwksp_reserve_aligned64(ws, maxNbSeq * sizeof(SeqDef)); 1867 + 1868 + /* ldm hash table */ 1869 + if (params->ldmParams.enableLdm == ZSTD_ps_enable) { 1870 + /* TODO: avoid memset? */ 1871 + size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog; 1872 + zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned64(ws, ldmHSize * sizeof(ldmEntry_t)); 1873 + ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); 1874 + zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned64(ws, maxNbLdmSeq * sizeof(rawSeq)); 1875 + zc->maxNbLdmSequences = maxNbLdmSeq; 1876 + 1877 + ZSTD_window_init(&zc->ldmState.window); 1878 + zc->ldmState.loadedDictEnd = 0; 1879 + } 1880 + 1881 + /* reserve space for block-level external sequences */ 1882 + if (ZSTD_hasExtSeqProd(params)) { 1883 + size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize); 1884 + zc->extSeqBufCapacity = maxNbExternalSeq; 1885 + zc->extSeqBuf = 1886 + (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned64(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence)); 1887 + } 1888 + 1889 + /* buffers */ 1890 + 2136 1891 /* ZSTD_wildcopy() is used to copy into the literals buffer, 2137 1892 * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. 2138 1893 */ 2139 1894 zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); 2140 1895 zc->seqStore.maxNbLit = blockSize; 2141 1896 2142 - /* buffers */ 2143 1897 zc->bufferedPolicy = zbuff; 2144 1898 zc->inBuffSize = buffInSize; 2145 1899 zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); ··· 2195 1883 zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); 2196 1884 zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); 2197 1885 zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); 2198 - zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); 2199 - 2200 - FORWARD_IF_ERROR(ZSTD_reset_matchState( 2201 - &zc->blockState.matchState, 2202 - ws, 2203 - &params->cParams, 2204 - params->useRowMatchFinder, 2205 - crp, 2206 - needsIndexReset, 2207 - ZSTD_resetTarget_CCtx), ""); 2208 - 2209 - /* ldm hash table */ 2210 - if (params->ldmParams.enableLdm == ZSTD_ps_enable) { 2211 - /* TODO: avoid memset? */ 2212 - size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog; 2213 - zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); 2214 - ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); 2215 - zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); 2216 - zc->maxNbLdmSequences = maxNbLdmSeq; 2217 - 2218 - ZSTD_window_init(&zc->ldmState.window); 2219 - zc->ldmState.loadedDictEnd = 0; 2220 - } 2221 1886 2222 1887 DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); 2223 - assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace)); 1888 + assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace)); 2224 1889 2225 1890 zc->initialized = 1; 2226 1891 ··· 2269 1980 } 2270 1981 2271 1982 params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, 2272 - cdict->dictContentSize, ZSTD_cpm_attachDict); 1983 + cdict->dictContentSize, ZSTD_cpm_attachDict, 1984 + params.useRowMatchFinder); 2273 1985 params.cParams.windowLog = windowLog; 2274 1986 params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */ 2275 1987 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize, ··· 2309 2019 return 0; 2310 2020 } 2311 2021 2022 + static void ZSTD_copyCDictTableIntoCCtx(U32* dst, U32 const* src, size_t tableSize, 2023 + ZSTD_compressionParameters const* cParams) { 2024 + if (ZSTD_CDictIndicesAreTagged(cParams)){ 2025 + /* Remove tags from the CDict table if they are present. 2026 + * See docs on "short cache" in zstd_compress_internal.h for context. */ 2027 + size_t i; 2028 + for (i = 0; i < tableSize; i++) { 2029 + U32 const taggedIndex = src[i]; 2030 + U32 const index = taggedIndex >> ZSTD_SHORT_CACHE_TAG_BITS; 2031 + dst[i] = index; 2032 + } 2033 + } else { 2034 + ZSTD_memcpy(dst, src, tableSize * sizeof(U32)); 2035 + } 2036 + } 2037 + 2312 2038 static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, 2313 2039 const ZSTD_CDict* cdict, 2314 2040 ZSTD_CCtx_params params, ··· 2360 2054 : 0; 2361 2055 size_t const hSize = (size_t)1 << cdict_cParams->hashLog; 2362 2056 2363 - ZSTD_memcpy(cctx->blockState.matchState.hashTable, 2364 - cdict->matchState.hashTable, 2365 - hSize * sizeof(U32)); 2057 + ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.hashTable, 2058 + cdict->matchState.hashTable, 2059 + hSize, cdict_cParams); 2060 + 2366 2061 /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */ 2367 2062 if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) { 2368 - ZSTD_memcpy(cctx->blockState.matchState.chainTable, 2369 - cdict->matchState.chainTable, 2370 - chainSize * sizeof(U32)); 2063 + ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.chainTable, 2064 + cdict->matchState.chainTable, 2065 + chainSize, cdict_cParams); 2371 2066 } 2372 2067 /* copy tag table */ 2373 2068 if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) { 2374 - size_t const tagTableSize = hSize*sizeof(U16); 2069 + size_t const tagTableSize = hSize; 2375 2070 ZSTD_memcpy(cctx->blockState.matchState.tagTable, 2376 - cdict->matchState.tagTable, 2377 - tagTableSize); 2071 + cdict->matchState.tagTable, 2072 + tagTableSize); 2073 + cctx->blockState.matchState.hashSalt = cdict->matchState.hashSalt; 2378 2074 } 2379 2075 } 2380 2076 2381 2077 /* Zero the hashTable3, since the cdict never fills it */ 2382 - { int const h3log = cctx->blockState.matchState.hashLog3; 2078 + assert(cctx->blockState.matchState.hashLog3 <= 31); 2079 + { U32 const h3log = cctx->blockState.matchState.hashLog3; 2383 2080 size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; 2384 2081 assert(cdict->matchState.hashLog3 == 0); 2385 2082 ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); ··· 2391 2082 ZSTD_cwksp_mark_tables_clean(&cctx->workspace); 2392 2083 2393 2084 /* copy dictionary offsets */ 2394 - { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; 2395 - ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; 2085 + { ZSTD_MatchState_t const* srcMatchState = &cdict->matchState; 2086 + ZSTD_MatchState_t* dstMatchState = &cctx->blockState.matchState; 2396 2087 dstMatchState->window = srcMatchState->window; 2397 2088 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; 2398 2089 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; ··· 2450 2141 /* Copy only compression parameters related to tables. */ 2451 2142 params.cParams = srcCCtx->appliedParams.cParams; 2452 2143 assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_ps_auto); 2453 - assert(srcCCtx->appliedParams.useBlockSplitter != ZSTD_ps_auto); 2144 + assert(srcCCtx->appliedParams.postBlockSplitter != ZSTD_ps_auto); 2454 2145 assert(srcCCtx->appliedParams.ldmParams.enableLdm != ZSTD_ps_auto); 2455 2146 params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder; 2456 - params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter; 2147 + params.postBlockSplitter = srcCCtx->appliedParams.postBlockSplitter; 2457 2148 params.ldmParams = srcCCtx->appliedParams.ldmParams; 2458 2149 params.fParams = fParams; 2150 + params.maxBlockSize = srcCCtx->appliedParams.maxBlockSize; 2459 2151 ZSTD_resetCCtx_internal(dstCCtx, &params, pledgedSrcSize, 2460 2152 /* loadedDictSize */ 0, 2461 2153 ZSTDcrp_leaveDirty, zbuff); ··· 2476 2166 ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog) 2477 2167 : 0; 2478 2168 size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; 2479 - int const h3log = srcCCtx->blockState.matchState.hashLog3; 2169 + U32 const h3log = srcCCtx->blockState.matchState.hashLog3; 2480 2170 size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; 2481 2171 2482 2172 ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable, ··· 2494 2184 2495 2185 /* copy dictionary offsets */ 2496 2186 { 2497 - const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; 2498 - ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; 2187 + const ZSTD_MatchState_t* srcMatchState = &srcCCtx->blockState.matchState; 2188 + ZSTD_MatchState_t* dstMatchState = &dstCCtx->blockState.matchState; 2499 2189 dstMatchState->window = srcMatchState->window; 2500 2190 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; 2501 2191 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; ··· 2544 2234 /* Protect special index values < ZSTD_WINDOW_START_INDEX. */ 2545 2235 U32 const reducerThreshold = reducerValue + ZSTD_WINDOW_START_INDEX; 2546 2236 assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ 2547 - assert(size < (1U<<31)); /* can be casted to int */ 2237 + assert(size < (1U<<31)); /* can be cast to int */ 2548 2238 2549 2239 2550 2240 for (rowNb=0 ; rowNb < nbRows ; rowNb++) { ··· 2577 2267 2578 2268 /*! ZSTD_reduceIndex() : 2579 2269 * rescale all indexes to avoid future overflow (indexes are U32) */ 2580 - static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue) 2270 + static void ZSTD_reduceIndex (ZSTD_MatchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue) 2581 2271 { 2582 2272 { U32 const hSize = (U32)1 << params->cParams.hashLog; 2583 2273 ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); ··· 2604 2294 2605 2295 /* See doc/zstd_compression_format.md for detailed format description */ 2606 2296 2607 - void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) 2297 + int ZSTD_seqToCodes(const SeqStore_t* seqStorePtr) 2608 2298 { 2609 - const seqDef* const sequences = seqStorePtr->sequencesStart; 2299 + const SeqDef* const sequences = seqStorePtr->sequencesStart; 2610 2300 BYTE* const llCodeTable = seqStorePtr->llCode; 2611 2301 BYTE* const ofCodeTable = seqStorePtr->ofCode; 2612 2302 BYTE* const mlCodeTable = seqStorePtr->mlCode; 2613 2303 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 2614 2304 U32 u; 2305 + int longOffsets = 0; 2615 2306 assert(nbSeq <= seqStorePtr->maxNbSeq); 2616 2307 for (u=0; u<nbSeq; u++) { 2617 2308 U32 const llv = sequences[u].litLength; 2309 + U32 const ofCode = ZSTD_highbit32(sequences[u].offBase); 2618 2310 U32 const mlv = sequences[u].mlBase; 2619 2311 llCodeTable[u] = (BYTE)ZSTD_LLcode(llv); 2620 - ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offBase); 2312 + ofCodeTable[u] = (BYTE)ofCode; 2621 2313 mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv); 2314 + assert(!(MEM_64bits() && ofCode >= STREAM_ACCUMULATOR_MIN)); 2315 + if (MEM_32bits() && ofCode >= STREAM_ACCUMULATOR_MIN) 2316 + longOffsets = 1; 2622 2317 } 2623 2318 if (seqStorePtr->longLengthType==ZSTD_llt_literalLength) 2624 2319 llCodeTable[seqStorePtr->longLengthPos] = MaxLL; 2625 2320 if (seqStorePtr->longLengthType==ZSTD_llt_matchLength) 2626 2321 mlCodeTable[seqStorePtr->longLengthPos] = MaxML; 2322 + return longOffsets; 2627 2323 } 2628 2324 2629 2325 /* ZSTD_useTargetCBlockSize(): ··· 2649 2333 * Returns 1 if true, 0 otherwise. */ 2650 2334 static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams) 2651 2335 { 2652 - DEBUGLOG(5, "ZSTD_blockSplitterEnabled (useBlockSplitter=%d)", cctxParams->useBlockSplitter); 2653 - assert(cctxParams->useBlockSplitter != ZSTD_ps_auto); 2654 - return (cctxParams->useBlockSplitter == ZSTD_ps_enable); 2336 + DEBUGLOG(5, "ZSTD_blockSplitterEnabled (postBlockSplitter=%d)", cctxParams->postBlockSplitter); 2337 + assert(cctxParams->postBlockSplitter != ZSTD_ps_auto); 2338 + return (cctxParams->postBlockSplitter == ZSTD_ps_enable); 2655 2339 } 2656 2340 2657 2341 /* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types ··· 2663 2347 U32 MLtype; 2664 2348 size_t size; 2665 2349 size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ 2350 + int longOffsets; 2666 2351 } ZSTD_symbolEncodingTypeStats_t; 2667 2352 2668 2353 /* ZSTD_buildSequencesStatistics(): ··· 2674 2357 * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32) 2675 2358 */ 2676 2359 static ZSTD_symbolEncodingTypeStats_t 2677 - ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, 2678 - const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, 2679 - BYTE* dst, const BYTE* const dstEnd, 2680 - ZSTD_strategy strategy, unsigned* countWorkspace, 2681 - void* entropyWorkspace, size_t entropyWkspSize) { 2360 + ZSTD_buildSequencesStatistics( 2361 + const SeqStore_t* seqStorePtr, size_t nbSeq, 2362 + const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, 2363 + BYTE* dst, const BYTE* const dstEnd, 2364 + ZSTD_strategy strategy, unsigned* countWorkspace, 2365 + void* entropyWorkspace, size_t entropyWkspSize) 2366 + { 2682 2367 BYTE* const ostart = dst; 2683 2368 const BYTE* const oend = dstEnd; 2684 2369 BYTE* op = ostart; ··· 2694 2375 2695 2376 stats.lastCountSize = 0; 2696 2377 /* convert length/distances into codes */ 2697 - ZSTD_seqToCodes(seqStorePtr); 2378 + stats.longOffsets = ZSTD_seqToCodes(seqStorePtr); 2698 2379 assert(op <= oend); 2699 2380 assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */ 2700 2381 /* build CTable for Literal Lengths */ ··· 2711 2392 assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2712 2393 { size_t const countSize = ZSTD_buildCTable( 2713 2394 op, (size_t)(oend - op), 2714 - CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype, 2395 + CTable_LitLength, LLFSELog, (SymbolEncodingType_e)stats.LLtype, 2715 2396 countWorkspace, max, llCodeTable, nbSeq, 2716 2397 LL_defaultNorm, LL_defaultNormLog, MaxLL, 2717 2398 prevEntropy->litlengthCTable, ··· 2732 2413 size_t const mostFrequent = HIST_countFast_wksp( 2733 2414 countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ 2734 2415 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ 2735 - ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; 2416 + ZSTD_DefaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; 2736 2417 DEBUGLOG(5, "Building OF table"); 2737 2418 nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; 2738 2419 stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, ··· 2743 2424 assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2744 2425 { size_t const countSize = ZSTD_buildCTable( 2745 2426 op, (size_t)(oend - op), 2746 - CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype, 2427 + CTable_OffsetBits, OffFSELog, (SymbolEncodingType_e)stats.Offtype, 2747 2428 countWorkspace, max, ofCodeTable, nbSeq, 2748 2429 OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, 2749 2430 prevEntropy->offcodeCTable, ··· 2773 2454 assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ 2774 2455 { size_t const countSize = ZSTD_buildCTable( 2775 2456 op, (size_t)(oend - op), 2776 - CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype, 2457 + CTable_MatchLength, MLFSELog, (SymbolEncodingType_e)stats.MLtype, 2777 2458 countWorkspace, max, mlCodeTable, nbSeq, 2778 2459 ML_defaultNorm, ML_defaultNormLog, MaxML, 2779 2460 prevEntropy->matchlengthCTable, ··· 2799 2480 */ 2800 2481 #define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20 2801 2482 MEM_STATIC size_t 2802 - ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, 2803 - const ZSTD_entropyCTables_t* prevEntropy, 2804 - ZSTD_entropyCTables_t* nextEntropy, 2805 - const ZSTD_CCtx_params* cctxParams, 2806 - void* dst, size_t dstCapacity, 2807 - void* entropyWorkspace, size_t entropyWkspSize, 2808 - const int bmi2) 2483 + ZSTD_entropyCompressSeqStore_internal( 2484 + void* dst, size_t dstCapacity, 2485 + const void* literals, size_t litSize, 2486 + const SeqStore_t* seqStorePtr, 2487 + const ZSTD_entropyCTables_t* prevEntropy, 2488 + ZSTD_entropyCTables_t* nextEntropy, 2489 + const ZSTD_CCtx_params* cctxParams, 2490 + void* entropyWorkspace, size_t entropyWkspSize, 2491 + const int bmi2) 2809 2492 { 2810 - const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; 2811 2493 ZSTD_strategy const strategy = cctxParams->cParams.strategy; 2812 2494 unsigned* count = (unsigned*)entropyWorkspace; 2813 2495 FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; 2814 2496 FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; 2815 2497 FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; 2816 - const seqDef* const sequences = seqStorePtr->sequencesStart; 2817 - const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; 2498 + const SeqDef* const sequences = seqStorePtr->sequencesStart; 2499 + const size_t nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 2818 2500 const BYTE* const ofCodeTable = seqStorePtr->ofCode; 2819 2501 const BYTE* const llCodeTable = seqStorePtr->llCode; 2820 2502 const BYTE* const mlCodeTable = seqStorePtr->mlCode; ··· 2823 2503 BYTE* const oend = ostart + dstCapacity; 2824 2504 BYTE* op = ostart; 2825 2505 size_t lastCountSize; 2506 + int longOffsets = 0; 2826 2507 2827 2508 entropyWorkspace = count + (MaxSeq + 1); 2828 2509 entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); 2829 2510 2830 - DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq); 2511 + DEBUGLOG(5, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu, dstCapacity=%zu)", nbSeq, dstCapacity); 2831 2512 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); 2832 2513 assert(entropyWkspSize >= HUF_WORKSPACE_SIZE); 2833 2514 2834 2515 /* Compress literals */ 2835 - { const BYTE* const literals = seqStorePtr->litStart; 2836 - size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart; 2837 - size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart; 2516 + { size_t const numSequences = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 2838 2517 /* Base suspicion of uncompressibility on ratio of literals to sequences */ 2839 - unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO); 2840 - size_t const litSize = (size_t)(seqStorePtr->lit - literals); 2518 + int const suspectUncompressible = (numSequences == 0) || (litSize / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO); 2519 + 2841 2520 size_t const cSize = ZSTD_compressLiterals( 2842 - &prevEntropy->huf, &nextEntropy->huf, 2843 - cctxParams->cParams.strategy, 2844 - ZSTD_literalsCompressionIsDisabled(cctxParams), 2845 2521 op, dstCapacity, 2846 2522 literals, litSize, 2847 2523 entropyWorkspace, entropyWkspSize, 2848 - bmi2, suspectUncompressible); 2524 + &prevEntropy->huf, &nextEntropy->huf, 2525 + cctxParams->cParams.strategy, 2526 + ZSTD_literalsCompressionIsDisabled(cctxParams), 2527 + suspectUncompressible, bmi2); 2849 2528 FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); 2850 2529 assert(cSize <= dstCapacity); 2851 2530 op += cSize; ··· 2870 2551 ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); 2871 2552 return (size_t)(op - ostart); 2872 2553 } 2873 - { 2874 - ZSTD_symbolEncodingTypeStats_t stats; 2875 - BYTE* seqHead = op++; 2554 + { BYTE* const seqHead = op++; 2876 2555 /* build stats for sequences */ 2877 - stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, 2556 + const ZSTD_symbolEncodingTypeStats_t stats = 2557 + ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, 2878 2558 &prevEntropy->fse, &nextEntropy->fse, 2879 2559 op, oend, 2880 2560 strategy, count, ··· 2882 2564 *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2)); 2883 2565 lastCountSize = stats.lastCountSize; 2884 2566 op += stats.size; 2567 + longOffsets = stats.longOffsets; 2885 2568 } 2886 2569 2887 2570 { size_t const bitstreamSize = ZSTD_encodeSequences( ··· 2916 2597 return (size_t)(op - ostart); 2917 2598 } 2918 2599 2919 - MEM_STATIC size_t 2920 - ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr, 2921 - const ZSTD_entropyCTables_t* prevEntropy, 2922 - ZSTD_entropyCTables_t* nextEntropy, 2923 - const ZSTD_CCtx_params* cctxParams, 2924 - void* dst, size_t dstCapacity, 2925 - size_t srcSize, 2926 - void* entropyWorkspace, size_t entropyWkspSize, 2927 - int bmi2) 2600 + static size_t 2601 + ZSTD_entropyCompressSeqStore_wExtLitBuffer( 2602 + void* dst, size_t dstCapacity, 2603 + const void* literals, size_t litSize, 2604 + size_t blockSize, 2605 + const SeqStore_t* seqStorePtr, 2606 + const ZSTD_entropyCTables_t* prevEntropy, 2607 + ZSTD_entropyCTables_t* nextEntropy, 2608 + const ZSTD_CCtx_params* cctxParams, 2609 + void* entropyWorkspace, size_t entropyWkspSize, 2610 + int bmi2) 2928 2611 { 2929 2612 size_t const cSize = ZSTD_entropyCompressSeqStore_internal( 2930 - seqStorePtr, prevEntropy, nextEntropy, cctxParams, 2931 2613 dst, dstCapacity, 2614 + literals, litSize, 2615 + seqStorePtr, prevEntropy, nextEntropy, cctxParams, 2932 2616 entropyWorkspace, entropyWkspSize, bmi2); 2933 2617 if (cSize == 0) return 0; 2934 2618 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. 2935 2619 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. 2936 2620 */ 2937 - if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) 2621 + if ((cSize == ERROR(dstSize_tooSmall)) & (blockSize <= dstCapacity)) { 2622 + DEBUGLOG(4, "not enough dstCapacity (%zu) for ZSTD_entropyCompressSeqStore_internal()=> do not compress block", dstCapacity); 2938 2623 return 0; /* block not compressed */ 2624 + } 2939 2625 FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed"); 2940 2626 2941 2627 /* Check compressibility */ 2942 - { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); 2628 + { size_t const maxCSize = blockSize - ZSTD_minGain(blockSize, cctxParams->cParams.strategy); 2943 2629 if (cSize >= maxCSize) return 0; /* block not compressed */ 2944 2630 } 2945 - DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize); 2631 + DEBUGLOG(5, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize); 2632 + /* libzstd decoder before > v1.5.4 is not compatible with compressed blocks of size ZSTD_BLOCKSIZE_MAX exactly. 2633 + * This restriction is indirectly already fulfilled by respecting ZSTD_minGain() condition above. 2634 + */ 2635 + assert(cSize < ZSTD_BLOCKSIZE_MAX); 2946 2636 return cSize; 2637 + } 2638 + 2639 + static size_t 2640 + ZSTD_entropyCompressSeqStore( 2641 + const SeqStore_t* seqStorePtr, 2642 + const ZSTD_entropyCTables_t* prevEntropy, 2643 + ZSTD_entropyCTables_t* nextEntropy, 2644 + const ZSTD_CCtx_params* cctxParams, 2645 + void* dst, size_t dstCapacity, 2646 + size_t srcSize, 2647 + void* entropyWorkspace, size_t entropyWkspSize, 2648 + int bmi2) 2649 + { 2650 + return ZSTD_entropyCompressSeqStore_wExtLitBuffer( 2651 + dst, dstCapacity, 2652 + seqStorePtr->litStart, (size_t)(seqStorePtr->lit - seqStorePtr->litStart), 2653 + srcSize, 2654 + seqStorePtr, 2655 + prevEntropy, nextEntropy, 2656 + cctxParams, 2657 + entropyWorkspace, entropyWkspSize, 2658 + bmi2); 2947 2659 } 2948 2660 2949 2661 /* ZSTD_selectBlockCompressor() : 2950 2662 * Not static, but internal use only (used by long distance matcher) 2951 2663 * assumption : strat is a valid strategy */ 2952 - ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e useRowMatchFinder, ZSTD_dictMode_e dictMode) 2664 + ZSTD_BlockCompressor_f ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_ParamSwitch_e useRowMatchFinder, ZSTD_dictMode_e dictMode) 2953 2665 { 2954 - static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { 2666 + static const ZSTD_BlockCompressor_f blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { 2955 2667 { ZSTD_compressBlock_fast /* default for 0 */, 2956 2668 ZSTD_compressBlock_fast, 2957 - ZSTD_compressBlock_doubleFast, 2958 - ZSTD_compressBlock_greedy, 2959 - ZSTD_compressBlock_lazy, 2960 - ZSTD_compressBlock_lazy2, 2961 - ZSTD_compressBlock_btlazy2, 2962 - ZSTD_compressBlock_btopt, 2963 - ZSTD_compressBlock_btultra, 2964 - ZSTD_compressBlock_btultra2 }, 2669 + ZSTD_COMPRESSBLOCK_DOUBLEFAST, 2670 + ZSTD_COMPRESSBLOCK_GREEDY, 2671 + ZSTD_COMPRESSBLOCK_LAZY, 2672 + ZSTD_COMPRESSBLOCK_LAZY2, 2673 + ZSTD_COMPRESSBLOCK_BTLAZY2, 2674 + ZSTD_COMPRESSBLOCK_BTOPT, 2675 + ZSTD_COMPRESSBLOCK_BTULTRA, 2676 + ZSTD_COMPRESSBLOCK_BTULTRA2 2677 + }, 2965 2678 { ZSTD_compressBlock_fast_extDict /* default for 0 */, 2966 2679 ZSTD_compressBlock_fast_extDict, 2967 - ZSTD_compressBlock_doubleFast_extDict, 2968 - ZSTD_compressBlock_greedy_extDict, 2969 - ZSTD_compressBlock_lazy_extDict, 2970 - ZSTD_compressBlock_lazy2_extDict, 2971 - ZSTD_compressBlock_btlazy2_extDict, 2972 - ZSTD_compressBlock_btopt_extDict, 2973 - ZSTD_compressBlock_btultra_extDict, 2974 - ZSTD_compressBlock_btultra_extDict }, 2680 + ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT, 2681 + ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT, 2682 + ZSTD_COMPRESSBLOCK_LAZY_EXTDICT, 2683 + ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT, 2684 + ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT, 2685 + ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT, 2686 + ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT, 2687 + ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT 2688 + }, 2975 2689 { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, 2976 2690 ZSTD_compressBlock_fast_dictMatchState, 2977 - ZSTD_compressBlock_doubleFast_dictMatchState, 2978 - ZSTD_compressBlock_greedy_dictMatchState, 2979 - ZSTD_compressBlock_lazy_dictMatchState, 2980 - ZSTD_compressBlock_lazy2_dictMatchState, 2981 - ZSTD_compressBlock_btlazy2_dictMatchState, 2982 - ZSTD_compressBlock_btopt_dictMatchState, 2983 - ZSTD_compressBlock_btultra_dictMatchState, 2984 - ZSTD_compressBlock_btultra_dictMatchState }, 2691 + ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE, 2692 + ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE, 2693 + ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE, 2694 + ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE, 2695 + ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE, 2696 + ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE, 2697 + ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE, 2698 + ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE 2699 + }, 2985 2700 { NULL /* default for 0 */, 2986 2701 NULL, 2987 2702 NULL, 2988 - ZSTD_compressBlock_greedy_dedicatedDictSearch, 2989 - ZSTD_compressBlock_lazy_dedicatedDictSearch, 2990 - ZSTD_compressBlock_lazy2_dedicatedDictSearch, 2703 + ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH, 2704 + ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH, 2705 + ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH, 2991 2706 NULL, 2992 2707 NULL, 2993 2708 NULL, 2994 2709 NULL } 2995 2710 }; 2996 - ZSTD_blockCompressor selectedCompressor; 2711 + ZSTD_BlockCompressor_f selectedCompressor; 2997 2712 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); 2998 2713 2999 - assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); 3000 - DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder); 2714 + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat)); 2715 + DEBUGLOG(5, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder); 3001 2716 if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) { 3002 - static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = { 3003 - { ZSTD_compressBlock_greedy_row, 3004 - ZSTD_compressBlock_lazy_row, 3005 - ZSTD_compressBlock_lazy2_row }, 3006 - { ZSTD_compressBlock_greedy_extDict_row, 3007 - ZSTD_compressBlock_lazy_extDict_row, 3008 - ZSTD_compressBlock_lazy2_extDict_row }, 3009 - { ZSTD_compressBlock_greedy_dictMatchState_row, 3010 - ZSTD_compressBlock_lazy_dictMatchState_row, 3011 - ZSTD_compressBlock_lazy2_dictMatchState_row }, 3012 - { ZSTD_compressBlock_greedy_dedicatedDictSearch_row, 3013 - ZSTD_compressBlock_lazy_dedicatedDictSearch_row, 3014 - ZSTD_compressBlock_lazy2_dedicatedDictSearch_row } 2717 + static const ZSTD_BlockCompressor_f rowBasedBlockCompressors[4][3] = { 2718 + { 2719 + ZSTD_COMPRESSBLOCK_GREEDY_ROW, 2720 + ZSTD_COMPRESSBLOCK_LAZY_ROW, 2721 + ZSTD_COMPRESSBLOCK_LAZY2_ROW 2722 + }, 2723 + { 2724 + ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW, 2725 + ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW, 2726 + ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW 2727 + }, 2728 + { 2729 + ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW, 2730 + ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW, 2731 + ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW 2732 + }, 2733 + { 2734 + ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW, 2735 + ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW, 2736 + ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW 2737 + } 3015 2738 }; 3016 - DEBUGLOG(4, "Selecting a row-based matchfinder"); 2739 + DEBUGLOG(5, "Selecting a row-based matchfinder"); 3017 2740 assert(useRowMatchFinder != ZSTD_ps_auto); 3018 2741 selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy]; 3019 2742 } else { ··· 3065 2704 return selectedCompressor; 3066 2705 } 3067 2706 3068 - static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, 2707 + static void ZSTD_storeLastLiterals(SeqStore_t* seqStorePtr, 3069 2708 const BYTE* anchor, size_t lastLLSize) 3070 2709 { 3071 2710 ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize); 3072 2711 seqStorePtr->lit += lastLLSize; 3073 2712 } 3074 2713 3075 - void ZSTD_resetSeqStore(seqStore_t* ssPtr) 2714 + void ZSTD_resetSeqStore(SeqStore_t* ssPtr) 3076 2715 { 3077 2716 ssPtr->lit = ssPtr->litStart; 3078 2717 ssPtr->sequences = ssPtr->sequencesStart; 3079 2718 ssPtr->longLengthType = ZSTD_llt_none; 3080 2719 } 3081 2720 3082 - typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; 2721 + /* ZSTD_postProcessSequenceProducerResult() : 2722 + * Validates and post-processes sequences obtained through the external matchfinder API: 2723 + * - Checks whether nbExternalSeqs represents an error condition. 2724 + * - Appends a block delimiter to outSeqs if one is not already present. 2725 + * See zstd.h for context regarding block delimiters. 2726 + * Returns the number of sequences after post-processing, or an error code. */ 2727 + static size_t ZSTD_postProcessSequenceProducerResult( 2728 + ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize 2729 + ) { 2730 + RETURN_ERROR_IF( 2731 + nbExternalSeqs > outSeqsCapacity, 2732 + sequenceProducer_failed, 2733 + "External sequence producer returned error code %lu", 2734 + (unsigned long)nbExternalSeqs 2735 + ); 2736 + 2737 + RETURN_ERROR_IF( 2738 + nbExternalSeqs == 0 && srcSize > 0, 2739 + sequenceProducer_failed, 2740 + "Got zero sequences from external sequence producer for a non-empty src buffer!" 2741 + ); 2742 + 2743 + if (srcSize == 0) { 2744 + ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence)); 2745 + return 1; 2746 + } 2747 + 2748 + { 2749 + ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1]; 2750 + 2751 + /* We can return early if lastSeq is already a block delimiter. */ 2752 + if (lastSeq.offset == 0 && lastSeq.matchLength == 0) { 2753 + return nbExternalSeqs; 2754 + } 2755 + 2756 + /* This error condition is only possible if the external matchfinder 2757 + * produced an invalid parse, by definition of ZSTD_sequenceBound(). */ 2758 + RETURN_ERROR_IF( 2759 + nbExternalSeqs == outSeqsCapacity, 2760 + sequenceProducer_failed, 2761 + "nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!" 2762 + ); 2763 + 2764 + /* lastSeq is not a block delimiter, so we need to append one. */ 2765 + ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence)); 2766 + return nbExternalSeqs + 1; 2767 + } 2768 + } 2769 + 2770 + /* ZSTD_fastSequenceLengthSum() : 2771 + * Returns sum(litLen) + sum(matchLen) + lastLits for *seqBuf*. 2772 + * Similar to another function in zstd_compress.c (determine_blockSize), 2773 + * except it doesn't check for a block delimiter to end summation. 2774 + * Removing the early exit allows the compiler to auto-vectorize (https://godbolt.org/z/cY1cajz9P). 2775 + * This function can be deleted and replaced by determine_blockSize after we resolve issue #3456. */ 2776 + static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seqBufSize) { 2777 + size_t matchLenSum, litLenSum, i; 2778 + matchLenSum = 0; 2779 + litLenSum = 0; 2780 + for (i = 0; i < seqBufSize; i++) { 2781 + litLenSum += seqBuf[i].litLength; 2782 + matchLenSum += seqBuf[i].matchLength; 2783 + } 2784 + return litLenSum + matchLenSum; 2785 + } 2786 + 2787 + /* 2788 + * Function to validate sequences produced by a block compressor. 2789 + */ 2790 + static void ZSTD_validateSeqStore(const SeqStore_t* seqStore, const ZSTD_compressionParameters* cParams) 2791 + { 2792 + #if DEBUGLEVEL >= 1 2793 + const SeqDef* seq = seqStore->sequencesStart; 2794 + const SeqDef* const seqEnd = seqStore->sequences; 2795 + size_t const matchLenLowerBound = cParams->minMatch == 3 ? 3 : 4; 2796 + for (; seq < seqEnd; ++seq) { 2797 + const ZSTD_SequenceLength seqLength = ZSTD_getSequenceLength(seqStore, seq); 2798 + assert(seqLength.matchLength >= matchLenLowerBound); 2799 + (void)seqLength; 2800 + (void)matchLenLowerBound; 2801 + } 2802 + #else 2803 + (void)seqStore; 2804 + (void)cParams; 2805 + #endif 2806 + } 2807 + 2808 + static size_t 2809 + ZSTD_transferSequences_wBlockDelim(ZSTD_CCtx* cctx, 2810 + ZSTD_SequencePosition* seqPos, 2811 + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, 2812 + const void* src, size_t blockSize, 2813 + ZSTD_ParamSwitch_e externalRepSearch); 2814 + 2815 + typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_BuildSeqStore_e; 3083 2816 3084 2817 static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) 3085 2818 { 3086 - ZSTD_matchState_t* const ms = &zc->blockState.matchState; 2819 + ZSTD_MatchState_t* const ms = &zc->blockState.matchState; 3087 2820 DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); 3088 2821 assert(srcSize <= ZSTD_BLOCKSIZE_MAX); 3089 2822 /* Assert that we have correctly flushed the ctx params into the ms's copy */ 3090 2823 ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); 3091 - if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { 2824 + /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding 2825 + * additional 1. We need to revisit and change this logic to be more consistent */ 2826 + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { 3092 2827 if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { 3093 2828 ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); 3094 2829 } else { ··· 3220 2763 } 3221 2764 if (zc->externSeqStore.pos < zc->externSeqStore.size) { 3222 2765 assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable); 2766 + 2767 + /* External matchfinder + LDM is technically possible, just not implemented yet. 2768 + * We need to revisit soon and implement it. */ 2769 + RETURN_ERROR_IF( 2770 + ZSTD_hasExtSeqProd(&zc->appliedParams), 2771 + parameter_combination_unsupported, 2772 + "Long-distance matching with external sequence producer enabled is not currently supported." 2773 + ); 2774 + 3223 2775 /* Updates ldmSeqStore.pos */ 3224 2776 lastLLSize = 3225 2777 ZSTD_ldm_blockCompress(&zc->externSeqStore, ··· 3238 2772 src, srcSize); 3239 2773 assert(zc->externSeqStore.pos <= zc->externSeqStore.size); 3240 2774 } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { 3241 - rawSeqStore_t ldmSeqStore = kNullRawSeqStore; 2775 + RawSeqStore_t ldmSeqStore = kNullRawSeqStore; 2776 + 2777 + /* External matchfinder + LDM is technically possible, just not implemented yet. 2778 + * We need to revisit soon and implement it. */ 2779 + RETURN_ERROR_IF( 2780 + ZSTD_hasExtSeqProd(&zc->appliedParams), 2781 + parameter_combination_unsupported, 2782 + "Long-distance matching with external sequence producer enabled is not currently supported." 2783 + ); 3242 2784 3243 2785 ldmSeqStore.seq = zc->ldmSequences; 3244 2786 ldmSeqStore.capacity = zc->maxNbLdmSequences; ··· 3262 2788 zc->appliedParams.useRowMatchFinder, 3263 2789 src, srcSize); 3264 2790 assert(ldmSeqStore.pos == ldmSeqStore.size); 3265 - } else { /* not long range mode */ 3266 - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, 3267 - zc->appliedParams.useRowMatchFinder, 3268 - dictMode); 2791 + } else if (ZSTD_hasExtSeqProd(&zc->appliedParams)) { 2792 + assert( 2793 + zc->extSeqBufCapacity >= ZSTD_sequenceBound(srcSize) 2794 + ); 2795 + assert(zc->appliedParams.extSeqProdFunc != NULL); 2796 + 2797 + { U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog; 2798 + 2799 + size_t const nbExternalSeqs = (zc->appliedParams.extSeqProdFunc)( 2800 + zc->appliedParams.extSeqProdState, 2801 + zc->extSeqBuf, 2802 + zc->extSeqBufCapacity, 2803 + src, srcSize, 2804 + NULL, 0, /* dict and dictSize, currently not supported */ 2805 + zc->appliedParams.compressionLevel, 2806 + windowSize 2807 + ); 2808 + 2809 + size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult( 2810 + zc->extSeqBuf, 2811 + nbExternalSeqs, 2812 + zc->extSeqBufCapacity, 2813 + srcSize 2814 + ); 2815 + 2816 + /* Return early if there is no error, since we don't need to worry about last literals */ 2817 + if (!ZSTD_isError(nbPostProcessedSeqs)) { 2818 + ZSTD_SequencePosition seqPos = {0,0,0}; 2819 + size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->extSeqBuf, nbPostProcessedSeqs); 2820 + RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!"); 2821 + FORWARD_IF_ERROR( 2822 + ZSTD_transferSequences_wBlockDelim( 2823 + zc, &seqPos, 2824 + zc->extSeqBuf, nbPostProcessedSeqs, 2825 + src, srcSize, 2826 + zc->appliedParams.searchForExternalRepcodes 2827 + ), 2828 + "Failed to copy external sequences to seqStore!" 2829 + ); 2830 + ms->ldmSeqStore = NULL; 2831 + DEBUGLOG(5, "Copied %lu sequences from external sequence producer to internal seqStore.", (unsigned long)nbExternalSeqs); 2832 + return ZSTDbss_compress; 2833 + } 2834 + 2835 + /* Propagate the error if fallback is disabled */ 2836 + if (!zc->appliedParams.enableMatchFinderFallback) { 2837 + return nbPostProcessedSeqs; 2838 + } 2839 + 2840 + /* Fallback to software matchfinder */ 2841 + { ZSTD_BlockCompressor_f const blockCompressor = 2842 + ZSTD_selectBlockCompressor( 2843 + zc->appliedParams.cParams.strategy, 2844 + zc->appliedParams.useRowMatchFinder, 2845 + dictMode); 2846 + ms->ldmSeqStore = NULL; 2847 + DEBUGLOG( 2848 + 5, 2849 + "External sequence producer returned error code %lu. Falling back to internal parser.", 2850 + (unsigned long)nbExternalSeqs 2851 + ); 2852 + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); 2853 + } } 2854 + } else { /* not long range mode and no external matchfinder */ 2855 + ZSTD_BlockCompressor_f const blockCompressor = ZSTD_selectBlockCompressor( 2856 + zc->appliedParams.cParams.strategy, 2857 + zc->appliedParams.useRowMatchFinder, 2858 + dictMode); 3269 2859 ms->ldmSeqStore = NULL; 3270 2860 lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); 3271 2861 } 3272 2862 { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; 3273 2863 ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); 3274 2864 } } 2865 + ZSTD_validateSeqStore(&zc->seqStore, &zc->appliedParams.cParams); 3275 2866 return ZSTDbss_compress; 3276 2867 } 3277 2868 3278 - static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) 2869 + static size_t ZSTD_copyBlockSequences(SeqCollector* seqCollector, const SeqStore_t* seqStore, const U32 prevRepcodes[ZSTD_REP_NUM]) 3279 2870 { 3280 - const seqStore_t* seqStore = ZSTD_getSeqStore(zc); 3281 - const seqDef* seqStoreSeqs = seqStore->sequencesStart; 3282 - size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs; 3283 - size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart); 3284 - size_t literalsRead = 0; 3285 - size_t lastLLSize; 2871 + const SeqDef* inSeqs = seqStore->sequencesStart; 2872 + const size_t nbInSequences = (size_t)(seqStore->sequences - inSeqs); 2873 + const size_t nbInLiterals = (size_t)(seqStore->lit - seqStore->litStart); 3286 2874 3287 - ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; 2875 + ZSTD_Sequence* outSeqs = seqCollector->seqIndex == 0 ? seqCollector->seqStart : seqCollector->seqStart + seqCollector->seqIndex; 2876 + const size_t nbOutSequences = nbInSequences + 1; 2877 + size_t nbOutLiterals = 0; 2878 + Repcodes_t repcodes; 3288 2879 size_t i; 3289 - repcodes_t updatedRepcodes; 3290 2880 3291 - assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); 3292 - /* Ensure we have enough space for last literals "sequence" */ 3293 - assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); 3294 - ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); 3295 - for (i = 0; i < seqStoreSeqSize; ++i) { 3296 - U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM; 3297 - outSeqs[i].litLength = seqStoreSeqs[i].litLength; 3298 - outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH; 2881 + /* Bounds check that we have enough space for every input sequence 2882 + * and the block delimiter 2883 + */ 2884 + assert(seqCollector->seqIndex <= seqCollector->maxSequences); 2885 + RETURN_ERROR_IF( 2886 + nbOutSequences > (size_t)(seqCollector->maxSequences - seqCollector->seqIndex), 2887 + dstSize_tooSmall, 2888 + "Not enough space to copy sequences"); 2889 + 2890 + ZSTD_memcpy(&repcodes, prevRepcodes, sizeof(repcodes)); 2891 + for (i = 0; i < nbInSequences; ++i) { 2892 + U32 rawOffset; 2893 + outSeqs[i].litLength = inSeqs[i].litLength; 2894 + outSeqs[i].matchLength = inSeqs[i].mlBase + MINMATCH; 3299 2895 outSeqs[i].rep = 0; 3300 2896 2897 + /* Handle the possible single length >= 64K 2898 + * There can only be one because we add MINMATCH to every match length, 2899 + * and blocks are at most 128K. 2900 + */ 3301 2901 if (i == seqStore->longLengthPos) { 3302 2902 if (seqStore->longLengthType == ZSTD_llt_literalLength) { 3303 2903 outSeqs[i].litLength += 0x10000; ··· 3380 2832 } 3381 2833 } 3382 2834 3383 - if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) { 3384 - /* Derive the correct offset corresponding to a repcode */ 3385 - outSeqs[i].rep = seqStoreSeqs[i].offBase; 2835 + /* Determine the raw offset given the offBase, which may be a repcode. */ 2836 + if (OFFBASE_IS_REPCODE(inSeqs[i].offBase)) { 2837 + const U32 repcode = OFFBASE_TO_REPCODE(inSeqs[i].offBase); 2838 + assert(repcode > 0); 2839 + outSeqs[i].rep = repcode; 3386 2840 if (outSeqs[i].litLength != 0) { 3387 - rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; 2841 + rawOffset = repcodes.rep[repcode - 1]; 3388 2842 } else { 3389 - if (outSeqs[i].rep == 3) { 3390 - rawOffset = updatedRepcodes.rep[0] - 1; 2843 + if (repcode == 3) { 2844 + assert(repcodes.rep[0] > 1); 2845 + rawOffset = repcodes.rep[0] - 1; 3391 2846 } else { 3392 - rawOffset = updatedRepcodes.rep[outSeqs[i].rep]; 2847 + rawOffset = repcodes.rep[repcode]; 3393 2848 } 3394 2849 } 2850 + } else { 2851 + rawOffset = OFFBASE_TO_OFFSET(inSeqs[i].offBase); 3395 2852 } 3396 2853 outSeqs[i].offset = rawOffset; 3397 - /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode 3398 - so we provide seqStoreSeqs[i].offset - 1 */ 3399 - ZSTD_updateRep(updatedRepcodes.rep, 3400 - seqStoreSeqs[i].offBase - 1, 3401 - seqStoreSeqs[i].litLength == 0); 3402 - literalsRead += outSeqs[i].litLength; 2854 + 2855 + /* Update repcode history for the sequence */ 2856 + ZSTD_updateRep(repcodes.rep, 2857 + inSeqs[i].offBase, 2858 + inSeqs[i].litLength == 0); 2859 + 2860 + nbOutLiterals += outSeqs[i].litLength; 3403 2861 } 3404 2862 /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. 3405 2863 * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker 3406 2864 * for the block boundary, according to the API. 3407 2865 */ 3408 - assert(seqStoreLiteralsSize >= literalsRead); 3409 - lastLLSize = seqStoreLiteralsSize - literalsRead; 3410 - outSeqs[i].litLength = (U32)lastLLSize; 3411 - outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0; 3412 - seqStoreSeqSize++; 3413 - zc->seqCollector.seqIndex += seqStoreSeqSize; 2866 + assert(nbInLiterals >= nbOutLiterals); 2867 + { 2868 + const size_t lastLLSize = nbInLiterals - nbOutLiterals; 2869 + outSeqs[nbInSequences].litLength = (U32)lastLLSize; 2870 + outSeqs[nbInSequences].matchLength = 0; 2871 + outSeqs[nbInSequences].offset = 0; 2872 + assert(nbOutSequences == nbInSequences + 1); 2873 + } 2874 + seqCollector->seqIndex += nbOutSequences; 2875 + assert(seqCollector->seqIndex <= seqCollector->maxSequences); 2876 + 2877 + return 0; 2878 + } 2879 + 2880 + size_t ZSTD_sequenceBound(size_t srcSize) { 2881 + const size_t maxNbSeq = (srcSize / ZSTD_MINMATCH_MIN) + 1; 2882 + const size_t maxNbDelims = (srcSize / ZSTD_BLOCKSIZE_MAX_MIN) + 1; 2883 + return maxNbSeq + maxNbDelims; 3414 2884 } 3415 2885 3416 2886 size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, 3417 2887 size_t outSeqsSize, const void* src, size_t srcSize) 3418 2888 { 3419 2889 const size_t dstCapacity = ZSTD_compressBound(srcSize); 3420 - void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); 2890 + void* dst; /* Make C90 happy. */ 3421 2891 SeqCollector seqCollector; 2892 + { 2893 + int targetCBlockSize; 2894 + FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_targetCBlockSize, &targetCBlockSize), ""); 2895 + RETURN_ERROR_IF(targetCBlockSize != 0, parameter_unsupported, "targetCBlockSize != 0"); 2896 + } 2897 + { 2898 + int nbWorkers; 2899 + FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_nbWorkers, &nbWorkers), ""); 2900 + RETURN_ERROR_IF(nbWorkers != 0, parameter_unsupported, "nbWorkers != 0"); 2901 + } 3422 2902 2903 + dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); 3423 2904 RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); 3424 2905 3425 2906 seqCollector.collectSequences = 1; ··· 3457 2880 seqCollector.maxSequences = outSeqsSize; 3458 2881 zc->seqCollector = seqCollector; 3459 2882 3460 - ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); 3461 - ZSTD_customFree(dst, ZSTD_defaultCMem); 2883 + { 2884 + const size_t ret = ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); 2885 + ZSTD_customFree(dst, ZSTD_defaultCMem); 2886 + FORWARD_IF_ERROR(ret, "ZSTD_compress2 failed"); 2887 + } 2888 + assert(zc->seqCollector.seqIndex <= ZSTD_sequenceBound(srcSize)); 3462 2889 return zc->seqCollector.seqIndex; 3463 2890 } 3464 2891 ··· 3491 2910 const size_t unrollMask = unrollSize - 1; 3492 2911 const size_t prefixLength = length & unrollMask; 3493 2912 size_t i; 3494 - size_t u; 3495 2913 if (length == 1) return 1; 3496 2914 /* Check if prefix is RLE first before using unrolled loop */ 3497 2915 if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) { 3498 2916 return 0; 3499 2917 } 3500 2918 for (i = prefixLength; i != length; i += unrollSize) { 2919 + size_t u; 3501 2920 for (u = 0; u < unrollSize; u += sizeof(size_t)) { 3502 2921 if (MEM_readST(ip + i + u) != valueST) { 3503 2922 return 0; 3504 - } 3505 - } 3506 - } 2923 + } } } 3507 2924 return 1; 3508 2925 } 3509 2926 ··· 3509 2930 * This is just a heuristic based on the compressibility. 3510 2931 * It may return both false positives and false negatives. 3511 2932 */ 3512 - static int ZSTD_maybeRLE(seqStore_t const* seqStore) 2933 + static int ZSTD_maybeRLE(SeqStore_t const* seqStore) 3513 2934 { 3514 2935 size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); 3515 2936 size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); ··· 3517 2938 return nbSeqs < 4 && nbLits < 10; 3518 2939 } 3519 2940 3520 - static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) 2941 + static void 2942 + ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) 3521 2943 { 3522 2944 ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock; 3523 2945 bs->prevCBlock = bs->nextCBlock; ··· 3526 2946 } 3527 2947 3528 2948 /* Writes the block header */ 3529 - static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) { 2949 + static void 2950 + writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) 2951 + { 3530 2952 U32 const cBlockHeader = cSize == 1 ? 3531 2953 lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : 3532 2954 lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); 3533 2955 MEM_writeLE24(op, cBlockHeader); 3534 - DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock); 2956 + DEBUGLOG(5, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock); 3535 2957 } 3536 2958 3537 2959 /* ZSTD_buildBlockEntropyStats_literals() : ··· 3541 2959 * Stores literals block type (raw, rle, compressed, repeat) and 3542 2960 * huffman description table to hufMetadata. 3543 2961 * Requires ENTROPY_WORKSPACE_SIZE workspace 3544 - * @return : size of huffman description table or error code */ 3545 - static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, 3546 - const ZSTD_hufCTables_t* prevHuf, 3547 - ZSTD_hufCTables_t* nextHuf, 3548 - ZSTD_hufCTablesMetadata_t* hufMetadata, 3549 - const int literalsCompressionIsDisabled, 3550 - void* workspace, size_t wkspSize) 2962 + * @return : size of huffman description table, or an error code 2963 + */ 2964 + static size_t 2965 + ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, 2966 + const ZSTD_hufCTables_t* prevHuf, 2967 + ZSTD_hufCTables_t* nextHuf, 2968 + ZSTD_hufCTablesMetadata_t* hufMetadata, 2969 + const int literalsCompressionIsDisabled, 2970 + void* workspace, size_t wkspSize, 2971 + int hufFlags) 3551 2972 { 3552 2973 BYTE* const wkspStart = (BYTE*)workspace; 3553 2974 BYTE* const wkspEnd = wkspStart + wkspSize; ··· 3558 2973 unsigned* const countWksp = (unsigned*)workspace; 3559 2974 const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); 3560 2975 BYTE* const nodeWksp = countWkspStart + countWkspSize; 3561 - const size_t nodeWkspSize = wkspEnd-nodeWksp; 2976 + const size_t nodeWkspSize = (size_t)(wkspEnd - nodeWksp); 3562 2977 unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; 3563 - unsigned huffLog = HUF_TABLELOG_DEFAULT; 2978 + unsigned huffLog = LitHufLog; 3564 2979 HUF_repeat repeat = prevHuf->repeatMode; 3565 2980 DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize); 3566 2981 ··· 3575 2990 3576 2991 /* small ? don't even attempt compression (speed opt) */ 3577 2992 #ifndef COMPRESS_LITERALS_SIZE_MIN 3578 - #define COMPRESS_LITERALS_SIZE_MIN 63 2993 + # define COMPRESS_LITERALS_SIZE_MIN 63 /* heuristic */ 3579 2994 #endif 3580 2995 { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; 3581 2996 if (srcSize <= minLitSize) { 3582 2997 DEBUGLOG(5, "set_basic - too small"); 3583 2998 hufMetadata->hType = set_basic; 3584 2999 return 0; 3585 - } 3586 - } 3000 + } } 3587 3001 3588 3002 /* Scan input and build symbol stats */ 3589 - { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); 3003 + { size_t const largest = 3004 + HIST_count_wksp (countWksp, &maxSymbolValue, 3005 + (const BYTE*)src, srcSize, 3006 + workspace, wkspSize); 3590 3007 FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); 3591 3008 if (largest == srcSize) { 3009 + /* only one literal symbol */ 3592 3010 DEBUGLOG(5, "set_rle"); 3593 3011 hufMetadata->hType = set_rle; 3594 3012 return 0; 3595 3013 } 3596 3014 if (largest <= (srcSize >> 7)+4) { 3015 + /* heuristic: likely not compressible */ 3597 3016 DEBUGLOG(5, "set_basic - no gain"); 3598 3017 hufMetadata->hType = set_basic; 3599 3018 return 0; 3600 - } 3601 - } 3019 + } } 3602 3020 3603 3021 /* Validate the previous Huffman table */ 3604 - if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { 3022 + if (repeat == HUF_repeat_check 3023 + && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { 3605 3024 repeat = HUF_repeat_none; 3606 3025 } 3607 3026 3608 3027 /* Build Huffman Tree */ 3609 3028 ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); 3610 - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); 3029 + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, nodeWksp, nodeWkspSize, nextHuf->CTable, countWksp, hufFlags); 3030 + assert(huffLog <= LitHufLog); 3611 3031 { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, 3612 3032 maxSymbolValue, huffLog, 3613 3033 nodeWksp, nodeWkspSize); 3614 3034 FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); 3615 3035 huffLog = (U32)maxBits; 3616 - { /* Build and write the CTable */ 3617 - size_t const newCSize = HUF_estimateCompressedSize( 3618 - (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); 3619 - size_t const hSize = HUF_writeCTable_wksp( 3620 - hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), 3621 - (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, 3622 - nodeWksp, nodeWkspSize); 3623 - /* Check against repeating the previous CTable */ 3624 - if (repeat != HUF_repeat_none) { 3625 - size_t const oldCSize = HUF_estimateCompressedSize( 3626 - (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); 3627 - if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { 3628 - DEBUGLOG(5, "set_repeat - smaller"); 3629 - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 3630 - hufMetadata->hType = set_repeat; 3631 - return 0; 3632 - } 3633 - } 3634 - if (newCSize + hSize >= srcSize) { 3635 - DEBUGLOG(5, "set_basic - no gains"); 3036 + } 3037 + { /* Build and write the CTable */ 3038 + size_t const newCSize = HUF_estimateCompressedSize( 3039 + (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); 3040 + size_t const hSize = HUF_writeCTable_wksp( 3041 + hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), 3042 + (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, 3043 + nodeWksp, nodeWkspSize); 3044 + /* Check against repeating the previous CTable */ 3045 + if (repeat != HUF_repeat_none) { 3046 + size_t const oldCSize = HUF_estimateCompressedSize( 3047 + (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); 3048 + if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { 3049 + DEBUGLOG(5, "set_repeat - smaller"); 3636 3050 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 3637 - hufMetadata->hType = set_basic; 3051 + hufMetadata->hType = set_repeat; 3638 3052 return 0; 3639 - } 3640 - DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); 3641 - hufMetadata->hType = set_compressed; 3642 - nextHuf->repeatMode = HUF_repeat_check; 3643 - return hSize; 3053 + } } 3054 + if (newCSize + hSize >= srcSize) { 3055 + DEBUGLOG(5, "set_basic - no gains"); 3056 + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 3057 + hufMetadata->hType = set_basic; 3058 + return 0; 3644 3059 } 3060 + DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); 3061 + hufMetadata->hType = set_compressed; 3062 + nextHuf->repeatMode = HUF_repeat_check; 3063 + return hSize; 3645 3064 } 3646 3065 } 3647 3066 ··· 3655 3066 * and updates nextEntropy to the appropriate repeatMode. 3656 3067 */ 3657 3068 static ZSTD_symbolEncodingTypeStats_t 3658 - ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) { 3659 - ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0}; 3069 + ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) 3070 + { 3071 + ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0, 0}; 3660 3072 nextEntropy->litlength_repeatMode = FSE_repeat_none; 3661 3073 nextEntropy->offcode_repeatMode = FSE_repeat_none; 3662 3074 nextEntropy->matchlength_repeatMode = FSE_repeat_none; ··· 3668 3078 * Builds entropy for the sequences. 3669 3079 * Stores symbol compression modes and fse table to fseMetadata. 3670 3080 * Requires ENTROPY_WORKSPACE_SIZE wksp. 3671 - * @return : size of fse tables or error code */ 3672 - static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, 3673 - const ZSTD_fseCTables_t* prevEntropy, 3674 - ZSTD_fseCTables_t* nextEntropy, 3675 - const ZSTD_CCtx_params* cctxParams, 3676 - ZSTD_fseCTablesMetadata_t* fseMetadata, 3677 - void* workspace, size_t wkspSize) 3081 + * @return : size of fse tables or error code */ 3082 + static size_t 3083 + ZSTD_buildBlockEntropyStats_sequences( 3084 + const SeqStore_t* seqStorePtr, 3085 + const ZSTD_fseCTables_t* prevEntropy, 3086 + ZSTD_fseCTables_t* nextEntropy, 3087 + const ZSTD_CCtx_params* cctxParams, 3088 + ZSTD_fseCTablesMetadata_t* fseMetadata, 3089 + void* workspace, size_t wkspSize) 3678 3090 { 3679 3091 ZSTD_strategy const strategy = cctxParams->cParams.strategy; 3680 - size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; 3092 + size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 3681 3093 BYTE* const ostart = fseMetadata->fseTablesBuffer; 3682 3094 BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); 3683 3095 BYTE* op = ostart; ··· 3695 3103 entropyWorkspace, entropyWorkspaceSize) 3696 3104 : ZSTD_buildDummySequencesStatistics(nextEntropy); 3697 3105 FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!"); 3698 - fseMetadata->llType = (symbolEncodingType_e) stats.LLtype; 3699 - fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype; 3700 - fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype; 3106 + fseMetadata->llType = (SymbolEncodingType_e) stats.LLtype; 3107 + fseMetadata->ofType = (SymbolEncodingType_e) stats.Offtype; 3108 + fseMetadata->mlType = (SymbolEncodingType_e) stats.MLtype; 3701 3109 fseMetadata->lastCountSize = stats.lastCountSize; 3702 3110 return stats.size; 3703 3111 } ··· 3706 3114 /* ZSTD_buildBlockEntropyStats() : 3707 3115 * Builds entropy for the block. 3708 3116 * Requires workspace size ENTROPY_WORKSPACE_SIZE 3709 - * 3710 - * @return : 0 on success or error code 3117 + * @return : 0 on success, or an error code 3118 + * Note : also employed in superblock 3711 3119 */ 3712 - size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, 3713 - const ZSTD_entropyCTables_t* prevEntropy, 3714 - ZSTD_entropyCTables_t* nextEntropy, 3715 - const ZSTD_CCtx_params* cctxParams, 3716 - ZSTD_entropyCTablesMetadata_t* entropyMetadata, 3717 - void* workspace, size_t wkspSize) 3120 + size_t ZSTD_buildBlockEntropyStats( 3121 + const SeqStore_t* seqStorePtr, 3122 + const ZSTD_entropyCTables_t* prevEntropy, 3123 + ZSTD_entropyCTables_t* nextEntropy, 3124 + const ZSTD_CCtx_params* cctxParams, 3125 + ZSTD_entropyCTablesMetadata_t* entropyMetadata, 3126 + void* workspace, size_t wkspSize) 3718 3127 { 3719 - size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; 3128 + size_t const litSize = (size_t)(seqStorePtr->lit - seqStorePtr->litStart); 3129 + int const huf_useOptDepth = (cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD); 3130 + int const hufFlags = huf_useOptDepth ? HUF_flags_optimalDepth : 0; 3131 + 3720 3132 entropyMetadata->hufMetadata.hufDesSize = 3721 3133 ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize, 3722 3134 &prevEntropy->huf, &nextEntropy->huf, 3723 3135 &entropyMetadata->hufMetadata, 3724 3136 ZSTD_literalsCompressionIsDisabled(cctxParams), 3725 - workspace, wkspSize); 3137 + workspace, wkspSize, hufFlags); 3138 + 3726 3139 FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed"); 3727 3140 entropyMetadata->fseMetadata.fseTablesSize = 3728 3141 ZSTD_buildBlockEntropyStats_sequences(seqStorePtr, ··· 3740 3143 } 3741 3144 3742 3145 /* Returns the size estimate for the literals section (header + content) of a block */ 3743 - static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize, 3744 - const ZSTD_hufCTables_t* huf, 3745 - const ZSTD_hufCTablesMetadata_t* hufMetadata, 3746 - void* workspace, size_t wkspSize, 3747 - int writeEntropy) 3146 + static size_t 3147 + ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize, 3148 + const ZSTD_hufCTables_t* huf, 3149 + const ZSTD_hufCTablesMetadata_t* hufMetadata, 3150 + void* workspace, size_t wkspSize, 3151 + int writeEntropy) 3748 3152 { 3749 3153 unsigned* const countWksp = (unsigned*)workspace; 3750 3154 unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; ··· 3767 3169 } 3768 3170 3769 3171 /* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */ 3770 - static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type, 3771 - const BYTE* codeTable, size_t nbSeq, unsigned maxCode, 3772 - const FSE_CTable* fseCTable, 3773 - const U8* additionalBits, 3774 - short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, 3775 - void* workspace, size_t wkspSize) 3172 + static size_t 3173 + ZSTD_estimateBlockSize_symbolType(SymbolEncodingType_e type, 3174 + const BYTE* codeTable, size_t nbSeq, unsigned maxCode, 3175 + const FSE_CTable* fseCTable, 3176 + const U8* additionalBits, 3177 + short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, 3178 + void* workspace, size_t wkspSize) 3776 3179 { 3777 3180 unsigned* const countWksp = (unsigned*)workspace; 3778 3181 const BYTE* ctp = codeTable; ··· 3805 3206 } 3806 3207 3807 3208 /* Returns the size estimate for the sequences section (header + content) of a block */ 3808 - static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable, 3809 - const BYTE* llCodeTable, 3810 - const BYTE* mlCodeTable, 3811 - size_t nbSeq, 3812 - const ZSTD_fseCTables_t* fseTables, 3813 - const ZSTD_fseCTablesMetadata_t* fseMetadata, 3814 - void* workspace, size_t wkspSize, 3815 - int writeEntropy) 3209 + static size_t 3210 + ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable, 3211 + const BYTE* llCodeTable, 3212 + const BYTE* mlCodeTable, 3213 + size_t nbSeq, 3214 + const ZSTD_fseCTables_t* fseTables, 3215 + const ZSTD_fseCTablesMetadata_t* fseMetadata, 3216 + void* workspace, size_t wkspSize, 3217 + int writeEntropy) 3816 3218 { 3817 3219 size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ); 3818 3220 size_t cSeqSizeEstimate = 0; 3819 3221 cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff, 3820 - fseTables->offcodeCTable, NULL, 3821 - OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, 3822 - workspace, wkspSize); 3222 + fseTables->offcodeCTable, NULL, 3223 + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, 3224 + workspace, wkspSize); 3823 3225 cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL, 3824 - fseTables->litlengthCTable, LL_bits, 3825 - LL_defaultNorm, LL_defaultNormLog, MaxLL, 3826 - workspace, wkspSize); 3226 + fseTables->litlengthCTable, LL_bits, 3227 + LL_defaultNorm, LL_defaultNormLog, MaxLL, 3228 + workspace, wkspSize); 3827 3229 cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML, 3828 - fseTables->matchlengthCTable, ML_bits, 3829 - ML_defaultNorm, ML_defaultNormLog, MaxML, 3830 - workspace, wkspSize); 3230 + fseTables->matchlengthCTable, ML_bits, 3231 + ML_defaultNorm, ML_defaultNormLog, MaxML, 3232 + workspace, wkspSize); 3831 3233 if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; 3832 3234 return cSeqSizeEstimate + sequencesSectionHeaderSize; 3833 3235 } 3834 3236 3835 3237 /* Returns the size estimate for a given stream of literals, of, ll, ml */ 3836 - static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize, 3837 - const BYTE* ofCodeTable, 3838 - const BYTE* llCodeTable, 3839 - const BYTE* mlCodeTable, 3840 - size_t nbSeq, 3841 - const ZSTD_entropyCTables_t* entropy, 3842 - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, 3843 - void* workspace, size_t wkspSize, 3844 - int writeLitEntropy, int writeSeqEntropy) { 3238 + static size_t 3239 + ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize, 3240 + const BYTE* ofCodeTable, 3241 + const BYTE* llCodeTable, 3242 + const BYTE* mlCodeTable, 3243 + size_t nbSeq, 3244 + const ZSTD_entropyCTables_t* entropy, 3245 + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, 3246 + void* workspace, size_t wkspSize, 3247 + int writeLitEntropy, int writeSeqEntropy) 3248 + { 3845 3249 size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize, 3846 - &entropy->huf, &entropyMetadata->hufMetadata, 3847 - workspace, wkspSize, writeLitEntropy); 3250 + &entropy->huf, &entropyMetadata->hufMetadata, 3251 + workspace, wkspSize, writeLitEntropy); 3848 3252 size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, 3849 - nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, 3850 - workspace, wkspSize, writeSeqEntropy); 3253 + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, 3254 + workspace, wkspSize, writeSeqEntropy); 3851 3255 return seqSize + literalsSize + ZSTD_blockHeaderSize; 3852 3256 } 3853 3257 3854 3258 /* Builds entropy statistics and uses them for blocksize estimation. 3855 3259 * 3856 - * Returns the estimated compressed size of the seqStore, or a zstd error. 3260 + * @return: estimated compressed size of the seqStore, or a zstd error. 3857 3261 */ 3858 - static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) { 3859 - ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata; 3262 + static size_t 3263 + ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(SeqStore_t* seqStore, ZSTD_CCtx* zc) 3264 + { 3265 + ZSTD_entropyCTablesMetadata_t* const entropyMetadata = &zc->blockSplitCtx.entropyMetadata; 3860 3266 DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()"); 3861 3267 FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore, 3862 3268 &zc->blockState.prevCBlock->entropy, 3863 3269 &zc->blockState.nextCBlock->entropy, 3864 3270 &zc->appliedParams, 3865 3271 entropyMetadata, 3866 - zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); 3867 - return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), 3272 + zc->tmpWorkspace, zc->tmpWkspSize), ""); 3273 + return ZSTD_estimateBlockSize( 3274 + seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), 3868 3275 seqStore->ofCode, seqStore->llCode, seqStore->mlCode, 3869 3276 (size_t)(seqStore->sequences - seqStore->sequencesStart), 3870 - &zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, 3277 + &zc->blockState.nextCBlock->entropy, 3278 + entropyMetadata, 3279 + zc->tmpWorkspace, zc->tmpWkspSize, 3871 3280 (int)(entropyMetadata->hufMetadata.hType == set_compressed), 1); 3872 3281 } 3873 3282 3874 3283 /* Returns literals bytes represented in a seqStore */ 3875 - static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) { 3284 + static size_t ZSTD_countSeqStoreLiteralsBytes(const SeqStore_t* const seqStore) 3285 + { 3876 3286 size_t literalsBytes = 0; 3877 - size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; 3287 + size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); 3878 3288 size_t i; 3879 3289 for (i = 0; i < nbSeqs; ++i) { 3880 - seqDef seq = seqStore->sequencesStart[i]; 3290 + SeqDef const seq = seqStore->sequencesStart[i]; 3881 3291 literalsBytes += seq.litLength; 3882 3292 if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) { 3883 3293 literalsBytes += 0x10000; 3884 - } 3885 - } 3294 + } } 3886 3295 return literalsBytes; 3887 3296 } 3888 3297 3889 3298 /* Returns match bytes represented in a seqStore */ 3890 - static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) { 3299 + static size_t ZSTD_countSeqStoreMatchBytes(const SeqStore_t* const seqStore) 3300 + { 3891 3301 size_t matchBytes = 0; 3892 - size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; 3302 + size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); 3893 3303 size_t i; 3894 3304 for (i = 0; i < nbSeqs; ++i) { 3895 - seqDef seq = seqStore->sequencesStart[i]; 3305 + SeqDef seq = seqStore->sequencesStart[i]; 3896 3306 matchBytes += seq.mlBase + MINMATCH; 3897 3307 if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) { 3898 3308 matchBytes += 0x10000; 3899 - } 3900 - } 3309 + } } 3901 3310 return matchBytes; 3902 3311 } 3903 3312 3904 3313 /* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx). 3905 3314 * Stores the result in resultSeqStore. 3906 3315 */ 3907 - static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, 3908 - const seqStore_t* originalSeqStore, 3909 - size_t startIdx, size_t endIdx) { 3910 - BYTE* const litEnd = originalSeqStore->lit; 3911 - size_t literalsBytes; 3912 - size_t literalsBytesPreceding = 0; 3913 - 3316 + static void ZSTD_deriveSeqStoreChunk(SeqStore_t* resultSeqStore, 3317 + const SeqStore_t* originalSeqStore, 3318 + size_t startIdx, size_t endIdx) 3319 + { 3914 3320 *resultSeqStore = *originalSeqStore; 3915 3321 if (startIdx > 0) { 3916 3322 resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx; 3917 - literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); 3323 + resultSeqStore->litStart += ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); 3918 3324 } 3919 3325 3920 3326 /* Move longLengthPos into the correct position if necessary */ ··· 3932 3328 } 3933 3329 resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx; 3934 3330 resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx; 3935 - literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); 3936 - resultSeqStore->litStart += literalsBytesPreceding; 3937 3331 if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) { 3938 3332 /* This accounts for possible last literals if the derived chunk reaches the end of the block */ 3939 - resultSeqStore->lit = litEnd; 3333 + assert(resultSeqStore->lit == originalSeqStore->lit); 3940 3334 } else { 3941 - resultSeqStore->lit = resultSeqStore->litStart+literalsBytes; 3335 + size_t const literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); 3336 + resultSeqStore->lit = resultSeqStore->litStart + literalsBytes; 3942 3337 } 3943 3338 resultSeqStore->llCode += startIdx; 3944 3339 resultSeqStore->mlCode += startIdx; ··· 3945 3342 } 3946 3343 3947 3344 /* 3948 - * Returns the raw offset represented by the combination of offCode, ll0, and repcode history. 3949 - * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq(). 3345 + * Returns the raw offset represented by the combination of offBase, ll0, and repcode history. 3346 + * offBase must represent a repcode in the numeric representation of ZSTD_storeSeq(). 3950 3347 */ 3951 3348 static U32 3952 - ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) 3349 + ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offBase, const U32 ll0) 3953 3350 { 3954 - U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0; /* [ 0 - 3 ] */ 3955 - assert(STORED_IS_REPCODE(offCode)); 3956 - if (adjustedOffCode == ZSTD_REP_NUM) { 3957 - /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */ 3958 - assert(rep[0] > 0); 3351 + U32 const adjustedRepCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; /* [ 0 - 3 ] */ 3352 + assert(OFFBASE_IS_REPCODE(offBase)); 3353 + if (adjustedRepCode == ZSTD_REP_NUM) { 3354 + assert(ll0); 3355 + /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 3356 + * This is only valid if it results in a valid offset value, aka > 0. 3357 + * Note : it may happen that `rep[0]==1` in exceptional circumstances. 3358 + * In which case this function will return 0, which is an invalid offset. 3359 + * It's not an issue though, since this value will be 3360 + * compared and discarded within ZSTD_seqStore_resolveOffCodes(). 3361 + */ 3959 3362 return rep[0] - 1; 3960 3363 } 3961 - return rep[adjustedOffCode]; 3364 + return rep[adjustedRepCode]; 3962 3365 } 3963 3366 3964 3367 /* ··· 3980 3371 * 1-3 : repcode 1-3 3981 3372 * 4+ : real_offset+3 3982 3373 */ 3983 - static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes, 3984 - seqStore_t* const seqStore, U32 const nbSeq) { 3374 + static void 3375 + ZSTD_seqStore_resolveOffCodes(Repcodes_t* const dRepcodes, Repcodes_t* const cRepcodes, 3376 + const SeqStore_t* const seqStore, U32 const nbSeq) 3377 + { 3985 3378 U32 idx = 0; 3379 + U32 const longLitLenIdx = seqStore->longLengthType == ZSTD_llt_literalLength ? seqStore->longLengthPos : nbSeq; 3986 3380 for (; idx < nbSeq; ++idx) { 3987 - seqDef* const seq = seqStore->sequencesStart + idx; 3988 - U32 const ll0 = (seq->litLength == 0); 3989 - U32 const offCode = OFFBASE_TO_STORED(seq->offBase); 3990 - assert(seq->offBase > 0); 3991 - if (STORED_IS_REPCODE(offCode)) { 3992 - U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0); 3993 - U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0); 3381 + SeqDef* const seq = seqStore->sequencesStart + idx; 3382 + U32 const ll0 = (seq->litLength == 0) && (idx != longLitLenIdx); 3383 + U32 const offBase = seq->offBase; 3384 + assert(offBase > 0); 3385 + if (OFFBASE_IS_REPCODE(offBase)) { 3386 + U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offBase, ll0); 3387 + U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offBase, ll0); 3994 3388 /* Adjust simulated decompression repcode history if we come across a mismatch. Replace 3995 3389 * the repcode with the offset it actually references, determined by the compression 3996 3390 * repcode history. 3997 3391 */ 3998 3392 if (dRawOffset != cRawOffset) { 3999 - seq->offBase = cRawOffset + ZSTD_REP_NUM; 3393 + seq->offBase = OFFSET_TO_OFFBASE(cRawOffset); 4000 3394 } 4001 3395 } 4002 3396 /* Compression repcode history is always updated with values directly from the unmodified seqStore. 4003 3397 * Decompression repcode history may use modified seq->offset value taken from compression repcode history. 4004 3398 */ 4005 - ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0); 4006 - ZSTD_updateRep(cRepcodes->rep, offCode, ll0); 3399 + ZSTD_updateRep(dRepcodes->rep, seq->offBase, ll0); 3400 + ZSTD_updateRep(cRepcodes->rep, offBase, ll0); 4007 3401 } 4008 3402 } 4009 3403 ··· 4016 3404 * Returns the total size of that block (including header) or a ZSTD error code. 4017 3405 */ 4018 3406 static size_t 4019 - ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore, 4020 - repcodes_t* const dRep, repcodes_t* const cRep, 3407 + ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, 3408 + const SeqStore_t* const seqStore, 3409 + Repcodes_t* const dRep, Repcodes_t* const cRep, 4021 3410 void* dst, size_t dstCapacity, 4022 - const void* src, size_t srcSize, 3411 + const void* src, size_t srcSize, 4023 3412 U32 lastBlock, U32 isPartition) 4024 3413 { 4025 3414 const U32 rleMaxLength = 25; ··· 4030 3417 size_t cSeqsSize; 4031 3418 4032 3419 /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */ 4033 - repcodes_t const dRepOriginal = *dRep; 3420 + Repcodes_t const dRepOriginal = *dRep; 4034 3421 DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock"); 4035 3422 if (isPartition) 4036 3423 ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart)); ··· 4041 3428 &zc->appliedParams, 4042 3429 op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, 4043 3430 srcSize, 4044 - zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, 3431 + zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */, 4045 3432 zc->bmi2); 4046 3433 FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!"); 4047 3434 ··· 4055 3442 cSeqsSize = 1; 4056 3443 } 4057 3444 3445 + /* Sequence collection not supported when block splitting */ 4058 3446 if (zc->seqCollector.collectSequences) { 4059 - ZSTD_copyBlockSequences(zc); 3447 + FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, seqStore, dRepOriginal.rep), "copyBlockSequences failed"); 4060 3448 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); 4061 3449 return 0; 4062 3450 } ··· 4065 3451 if (cSeqsSize == 0) { 4066 3452 cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); 4067 3453 FORWARD_IF_ERROR(cSize, "Nocompress block failed"); 4068 - DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize); 3454 + DEBUGLOG(5, "Writing out nocompress block, size: %zu", cSize); 4069 3455 *dRep = dRepOriginal; /* reset simulated decompression repcode history */ 4070 3456 } else if (cSeqsSize == 1) { 4071 3457 cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock); 4072 3458 FORWARD_IF_ERROR(cSize, "RLE compress block failed"); 4073 - DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize); 3459 + DEBUGLOG(5, "Writing out RLE block, size: %zu", cSize); 4074 3460 *dRep = dRepOriginal; /* reset simulated decompression repcode history */ 4075 3461 } else { 4076 3462 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); 4077 3463 writeBlockHeader(op, cSeqsSize, srcSize, lastBlock); 4078 3464 cSize = ZSTD_blockHeaderSize + cSeqsSize; 4079 - DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize); 3465 + DEBUGLOG(5, "Writing out compressed block, size: %zu", cSize); 4080 3466 } 4081 3467 4082 3468 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) ··· 4095 3481 4096 3482 /* Helper function to perform the recursive search for block splits. 4097 3483 * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half. 4098 - * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then 4099 - * we do not recurse. 3484 + * If advantageous to split, then we recurse down the two sub-blocks. 3485 + * If not, or if an error occurred in estimation, then we do not recurse. 4100 3486 * 4101 - * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING. 3487 + * Note: The recursion depth is capped by a heuristic minimum number of sequences, 3488 + * defined by MIN_SEQUENCES_BLOCK_SPLITTING. 4102 3489 * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING). 4103 3490 * In practice, recursion depth usually doesn't go beyond 4. 4104 3491 * 4105 - * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize 3492 + * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. 3493 + * At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize 4106 3494 * maximum of 128 KB, this value is actually impossible to reach. 4107 3495 */ 4108 3496 static void 4109 3497 ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx, 4110 - ZSTD_CCtx* zc, const seqStore_t* origSeqStore) 3498 + ZSTD_CCtx* zc, const SeqStore_t* origSeqStore) 4111 3499 { 4112 - seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk; 4113 - seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore; 4114 - seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore; 3500 + SeqStore_t* const fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk; 3501 + SeqStore_t* const firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore; 3502 + SeqStore_t* const secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore; 4115 3503 size_t estimatedOriginalSize; 4116 3504 size_t estimatedFirstHalfSize; 4117 3505 size_t estimatedSecondHalfSize; 4118 3506 size_t midIdx = (startIdx + endIdx)/2; 4119 3507 3508 + DEBUGLOG(5, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx); 3509 + assert(endIdx >= startIdx); 4120 3510 if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) { 4121 - DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences"); 3511 + DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences (%zu)", endIdx - startIdx); 4122 3512 return; 4123 3513 } 4124 - DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx); 4125 3514 ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx); 4126 3515 ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx); 4127 3516 ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx); 4128 3517 estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc); 4129 3518 estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc); 4130 3519 estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc); 4131 - DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu", 3520 + DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu", 4132 3521 estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize); 4133 3522 if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) { 4134 3523 return; 4135 3524 } 4136 3525 if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) { 3526 + DEBUGLOG(5, "split decided at seqNb:%zu", midIdx); 4137 3527 ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore); 4138 3528 splits->splitLocations[splits->idx] = (U32)midIdx; 4139 3529 splits->idx++; ··· 4145 3527 } 4146 3528 } 4147 3529 4148 - /* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio. 3530 + /* Base recursive function. 3531 + * Populates a table with intra-block partition indices that can improve compression ratio. 4149 3532 * 4150 - * Returns the number of splits made (which equals the size of the partition table - 1). 3533 + * @return: number of splits made (which equals the size of the partition table - 1). 4151 3534 */ 4152 - static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) { 4153 - seqStoreSplits splits = {partitions, 0}; 3535 + static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) 3536 + { 3537 + seqStoreSplits splits; 3538 + splits.splitLocations = partitions; 3539 + splits.idx = 0; 4154 3540 if (nbSeq <= 4) { 4155 - DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split"); 3541 + DEBUGLOG(5, "ZSTD_deriveBlockSplits: Too few sequences to split (%u <= 4)", nbSeq); 4156 3542 /* Refuse to try and split anything with less than 4 sequences */ 4157 3543 return 0; 4158 3544 } ··· 4172 3550 * Returns combined size of all blocks (which includes headers), or a ZSTD error code. 4173 3551 */ 4174 3552 static size_t 4175 - ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, 4176 - const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) 3553 + ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, 3554 + void* dst, size_t dstCapacity, 3555 + const void* src, size_t blockSize, 3556 + U32 lastBlock, U32 nbSeq) 4177 3557 { 4178 3558 size_t cSize = 0; 4179 3559 const BYTE* ip = (const BYTE*)src; 4180 3560 BYTE* op = (BYTE*)dst; 4181 3561 size_t i = 0; 4182 3562 size_t srcBytesTotal = 0; 4183 - U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */ 4184 - seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore; 4185 - seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore; 4186 - size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); 3563 + U32* const partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */ 3564 + SeqStore_t* const nextSeqStore = &zc->blockSplitCtx.nextSeqStore; 3565 + SeqStore_t* const currSeqStore = &zc->blockSplitCtx.currSeqStore; 3566 + size_t const numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); 4187 3567 4188 3568 /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history 4189 3569 * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two ··· 4201 3577 * 4202 3578 * See ZSTD_seqStore_resolveOffCodes() for more details. 4203 3579 */ 4204 - repcodes_t dRep; 4205 - repcodes_t cRep; 4206 - ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); 4207 - ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); 4208 - ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t)); 3580 + Repcodes_t dRep; 3581 + Repcodes_t cRep; 3582 + ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(Repcodes_t)); 3583 + ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(Repcodes_t)); 3584 + ZSTD_memset(nextSeqStore, 0, sizeof(SeqStore_t)); 4209 3585 4210 - DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", 3586 + DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", 4211 3587 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, 4212 3588 (unsigned)zc->blockState.matchState.nextToUpdate); 4213 3589 4214 3590 if (numSplits == 0) { 4215 - size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, 4216 - &dRep, &cRep, 4217 - op, dstCapacity, 4218 - ip, blockSize, 4219 - lastBlock, 0 /* isPartition */); 3591 + size_t cSizeSingleBlock = 3592 + ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, 3593 + &dRep, &cRep, 3594 + op, dstCapacity, 3595 + ip, blockSize, 3596 + lastBlock, 0 /* isPartition */); 4220 3597 FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!"); 4221 3598 DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits"); 4222 - assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); 3599 + assert(zc->blockSizeMax <= ZSTD_BLOCKSIZE_MAX); 3600 + assert(cSizeSingleBlock <= zc->blockSizeMax + ZSTD_blockHeaderSize); 4223 3601 return cSizeSingleBlock; 4224 3602 } 4225 3603 4226 3604 ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]); 4227 3605 for (i = 0; i <= numSplits; ++i) { 4228 - size_t srcBytes; 4229 3606 size_t cSizeChunk; 4230 3607 U32 const lastPartition = (i == numSplits); 4231 3608 U32 lastBlockEntireSrc = 0; 4232 3609 4233 - srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore); 3610 + size_t srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore); 4234 3611 srcBytesTotal += srcBytes; 4235 3612 if (lastPartition) { 4236 3613 /* This is the final partition, need to account for possible last literals */ ··· 4246 3621 op, dstCapacity, 4247 3622 ip, srcBytes, 4248 3623 lastBlockEntireSrc, 1 /* isPartition */); 4249 - DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk); 3624 + DEBUGLOG(5, "Estimated size: %zu vs %zu : actual size", 3625 + ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk); 4250 3626 FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!"); 4251 3627 4252 3628 ip += srcBytes; ··· 4255 3629 dstCapacity -= cSizeChunk; 4256 3630 cSize += cSizeChunk; 4257 3631 *currSeqStore = *nextSeqStore; 4258 - assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); 3632 + assert(cSizeChunk <= zc->blockSizeMax + ZSTD_blockHeaderSize); 4259 3633 } 4260 - /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes 4261 - * for the next block. 3634 + /* cRep and dRep may have diverged during the compression. 3635 + * If so, we use the dRep repcodes for the next block. 4262 3636 */ 4263 - ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t)); 3637 + ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(Repcodes_t)); 4264 3638 return cSize; 4265 3639 } 4266 3640 ··· 4269 3643 void* dst, size_t dstCapacity, 4270 3644 const void* src, size_t srcSize, U32 lastBlock) 4271 3645 { 4272 - const BYTE* ip = (const BYTE*)src; 4273 - BYTE* op = (BYTE*)dst; 4274 3646 U32 nbSeq; 4275 3647 size_t cSize; 4276 - DEBUGLOG(4, "ZSTD_compressBlock_splitBlock"); 4277 - assert(zc->appliedParams.useBlockSplitter == ZSTD_ps_enable); 3648 + DEBUGLOG(5, "ZSTD_compressBlock_splitBlock"); 3649 + assert(zc->appliedParams.postBlockSplitter == ZSTD_ps_enable); 4278 3650 4279 3651 { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); 4280 3652 FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); 4281 3653 if (bss == ZSTDbss_noCompress) { 4282 3654 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 4283 3655 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 4284 - cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); 3656 + RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block"); 3657 + cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); 4285 3658 FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); 4286 - DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block"); 3659 + DEBUGLOG(5, "ZSTD_compressBlock_splitBlock: Nocompress block"); 4287 3660 return cSize; 4288 3661 } 4289 3662 nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart); ··· 4298 3673 void* dst, size_t dstCapacity, 4299 3674 const void* src, size_t srcSize, U32 frame) 4300 3675 { 4301 - /* This the upper bound for the length of an rle block. 4302 - * This isn't the actual upper bound. Finding the real threshold 4303 - * needs further investigation. 3676 + /* This is an estimated upper bound for the length of an rle block. 3677 + * This isn't the actual upper bound. 3678 + * Finding the real threshold needs further investigation. 4304 3679 */ 4305 3680 const U32 rleMaxLength = 25; 4306 3681 size_t cSize; ··· 4312 3687 4313 3688 { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); 4314 3689 FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); 4315 - if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } 3690 + if (bss == ZSTDbss_noCompress) { 3691 + RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block"); 3692 + cSize = 0; 3693 + goto out; 3694 + } 4316 3695 } 4317 3696 4318 3697 if (zc->seqCollector.collectSequences) { 4319 - ZSTD_copyBlockSequences(zc); 3698 + FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, ZSTD_getSeqStore(zc), zc->blockState.prevCBlock->rep), "copyBlockSequences failed"); 4320 3699 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); 4321 3700 return 0; 4322 3701 } ··· 4331 3702 &zc->appliedParams, 4332 3703 dst, dstCapacity, 4333 3704 srcSize, 4334 - zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, 3705 + zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */, 4335 3706 zc->bmi2); 4336 3707 4337 3708 if (frame && ··· 4396 3767 * * cSize >= blockBound(srcSize): We have expanded the block too much so 4397 3768 * emit an uncompressed block. 4398 3769 */ 4399 - { 4400 - size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); 3770 + { size_t const cSize = 3771 + ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); 4401 3772 if (cSize != ERROR(dstSize_tooSmall)) { 4402 - size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); 3773 + size_t const maxCSize = 3774 + srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); 4403 3775 FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); 4404 3776 if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { 4405 3777 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); ··· 4408 3778 } 4409 3779 } 4410 3780 } 4411 - } 3781 + } /* if (bss == ZSTDbss_compress)*/ 4412 3782 4413 3783 DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); 4414 3784 /* Superblock compression failed, attempt to emit a single no compress block. ··· 4437 3807 return cSize; 4438 3808 } 4439 3809 4440 - static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, 3810 + static void ZSTD_overflowCorrectIfNeeded(ZSTD_MatchState_t* ms, 4441 3811 ZSTD_cwksp* ws, 4442 3812 ZSTD_CCtx_params const* params, 4443 3813 void const* ip, ··· 4461 3831 } 4462 3832 } 4463 3833 3834 + #include "zstd_preSplit.h" 3835 + 3836 + static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t srcSize, size_t blockSizeMax, int splitLevel, ZSTD_strategy strat, S64 savings) 3837 + { 3838 + /* split level based on compression strategy, from `fast` to `btultra2` */ 3839 + static const int splitLevels[] = { 0, 0, 1, 2, 2, 3, 3, 4, 4, 4 }; 3840 + /* note: conservatively only split full blocks (128 KB) currently. 3841 + * While it's possible to go lower, let's keep it simple for a first implementation. 3842 + * Besides, benefits of splitting are reduced when blocks are already small. 3843 + */ 3844 + if (srcSize < 128 KB || blockSizeMax < 128 KB) 3845 + return MIN(srcSize, blockSizeMax); 3846 + /* do not split incompressible data though: 3847 + * require verified savings to allow pre-splitting. 3848 + * Note: as a consequence, the first full block is not split. 3849 + */ 3850 + if (savings < 3) { 3851 + DEBUGLOG(6, "don't attempt splitting: savings (%i) too low", (int)savings); 3852 + return 128 KB; 3853 + } 3854 + /* apply @splitLevel, or use default value (which depends on @strat). 3855 + * note that splitting heuristic is still conditioned by @savings >= 3, 3856 + * so the first block will not reach this code path */ 3857 + if (splitLevel == 1) return 128 KB; 3858 + if (splitLevel == 0) { 3859 + assert(ZSTD_fast <= strat && strat <= ZSTD_btultra2); 3860 + splitLevel = splitLevels[strat]; 3861 + } else { 3862 + assert(2 <= splitLevel && splitLevel <= 6); 3863 + splitLevel -= 2; 3864 + } 3865 + return ZSTD_splitBlock(src, blockSizeMax, splitLevel, cctx->tmpWorkspace, cctx->tmpWkspSize); 3866 + } 3867 + 4464 3868 /*! ZSTD_compress_frameChunk() : 4465 3869 * Compress a chunk of data into one or multiple blocks. 4466 3870 * All blocks will be terminated, all input will be consumed. 4467 3871 * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. 4468 3872 * Frame is supposed already started (header already produced) 4469 - * @return : compressed size, or an error code 3873 + * @return : compressed size, or an error code 4470 3874 */ 4471 3875 static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, 4472 3876 void* dst, size_t dstCapacity, 4473 3877 const void* src, size_t srcSize, 4474 3878 U32 lastFrameChunk) 4475 3879 { 4476 - size_t blockSize = cctx->blockSize; 3880 + size_t blockSizeMax = cctx->blockSizeMax; 4477 3881 size_t remaining = srcSize; 4478 3882 const BYTE* ip = (const BYTE*)src; 4479 3883 BYTE* const ostart = (BYTE*)dst; 4480 3884 BYTE* op = ostart; 4481 3885 U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; 3886 + S64 savings = (S64)cctx->consumedSrcSize - (S64)cctx->producedCSize; 4482 3887 4483 3888 assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); 4484 3889 4485 - DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); 3890 + DEBUGLOG(5, "ZSTD_compress_frameChunk (srcSize=%u, blockSizeMax=%u)", (unsigned)srcSize, (unsigned)blockSizeMax); 4486 3891 if (cctx->appliedParams.fParams.checksumFlag && srcSize) 4487 3892 xxh64_update(&cctx->xxhState, src, srcSize); 4488 3893 4489 3894 while (remaining) { 4490 - ZSTD_matchState_t* const ms = &cctx->blockState.matchState; 4491 - U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); 3895 + ZSTD_MatchState_t* const ms = &cctx->blockState.matchState; 3896 + size_t const blockSize = ZSTD_optimalBlockSize(cctx, 3897 + ip, remaining, 3898 + blockSizeMax, 3899 + cctx->appliedParams.preBlockSplitter_level, 3900 + cctx->appliedParams.cParams.strategy, 3901 + savings); 3902 + U32 const lastBlock = lastFrameChunk & (blockSize == remaining); 3903 + assert(blockSize <= remaining); 4492 3904 4493 - RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, 3905 + /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding 3906 + * additional 1. We need to revisit and change this logic to be more consistent */ 3907 + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE + 1, 4494 3908 dstSize_tooSmall, 4495 3909 "not enough space to store compressed block"); 4496 - if (remaining < blockSize) blockSize = remaining; 4497 3910 4498 3911 ZSTD_overflowCorrectIfNeeded( 4499 3912 ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); ··· 4572 3899 MEM_writeLE24(op, cBlockHeader); 4573 3900 cSize += ZSTD_blockHeaderSize; 4574 3901 } 4575 - } 3902 + } /* if (ZSTD_useTargetCBlockSize(&cctx->appliedParams))*/ 4576 3903 3904 + /* @savings is employed to ensure that splitting doesn't worsen expansion of incompressible data. 3905 + * Without splitting, the maximum expansion is 3 bytes per full block. 3906 + * An adversarial input could attempt to fudge the split detector, 3907 + * and make it split incompressible data, resulting in more block headers. 3908 + * Note that, since ZSTD_COMPRESSBOUND() assumes a worst case scenario of 1KB per block, 3909 + * and the splitter never creates blocks that small (current lower limit is 8 KB), 3910 + * there is already no risk to expand beyond ZSTD_COMPRESSBOUND() limit. 3911 + * But if the goal is to not expand by more than 3-bytes per 128 KB full block, 3912 + * then yes, it becomes possible to make the block splitter oversplit incompressible data. 3913 + * Using @savings, we enforce an even more conservative condition, 3914 + * requiring the presence of enough savings (at least 3 bytes) to authorize splitting, 3915 + * otherwise only full blocks are used. 3916 + * But being conservative is fine, 3917 + * since splitting barely compressible blocks is not fruitful anyway */ 3918 + savings += (S64)blockSize - (S64)cSize; 4577 3919 4578 3920 ip += blockSize; 4579 3921 assert(remaining >= blockSize); ··· 4607 3919 4608 3920 4609 3921 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, 4610 - const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) 4611 - { BYTE* const op = (BYTE*)dst; 3922 + const ZSTD_CCtx_params* params, 3923 + U64 pledgedSrcSize, U32 dictID) 3924 + { 3925 + BYTE* const op = (BYTE*)dst; 4612 3926 U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ 4613 3927 U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ 4614 3928 U32 const checksumFlag = params->fParams.checksumFlag>0; ··· 4691 4001 } 4692 4002 } 4693 4003 4694 - size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) 4004 + void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) 4695 4005 { 4696 - RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong, 4697 - "wrong cctx stage"); 4698 - RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable, 4699 - parameter_unsupported, 4700 - "incompatible with ldm"); 4006 + assert(cctx->stage == ZSTDcs_init); 4007 + assert(nbSeq == 0 || cctx->appliedParams.ldmParams.enableLdm != ZSTD_ps_enable); 4701 4008 cctx->externSeqStore.seq = seq; 4702 4009 cctx->externSeqStore.size = nbSeq; 4703 4010 cctx->externSeqStore.capacity = nbSeq; 4704 4011 cctx->externSeqStore.pos = 0; 4705 4012 cctx->externSeqStore.posInSequence = 0; 4706 - return 0; 4707 4013 } 4708 4014 4709 4015 ··· 4708 4022 const void* src, size_t srcSize, 4709 4023 U32 frame, U32 lastFrameChunk) 4710 4024 { 4711 - ZSTD_matchState_t* const ms = &cctx->blockState.matchState; 4025 + ZSTD_MatchState_t* const ms = &cctx->blockState.matchState; 4712 4026 size_t fhSize = 0; 4713 4027 4714 4028 DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", ··· 4743 4057 src, (BYTE const*)src + srcSize); 4744 4058 } 4745 4059 4746 - DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); 4060 + DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSizeMax); 4747 4061 { size_t const cSize = frame ? 4748 4062 ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : 4749 4063 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); ··· 4764 4078 } 4765 4079 } 4766 4080 4767 - size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, 4768 - void* dst, size_t dstCapacity, 4769 - const void* src, size_t srcSize) 4081 + size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx, 4082 + void* dst, size_t dstCapacity, 4083 + const void* src, size_t srcSize) 4770 4084 { 4771 4085 DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); 4772 4086 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); 4773 4087 } 4774 4088 4089 + /* NOTE: Must just wrap ZSTD_compressContinue_public() */ 4090 + size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, 4091 + void* dst, size_t dstCapacity, 4092 + const void* src, size_t srcSize) 4093 + { 4094 + return ZSTD_compressContinue_public(cctx, dst, dstCapacity, src, srcSize); 4095 + } 4775 4096 4776 - size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) 4097 + static size_t ZSTD_getBlockSize_deprecated(const ZSTD_CCtx* cctx) 4777 4098 { 4778 4099 ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; 4779 4100 assert(!ZSTD_checkCParams(cParams)); 4780 - return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); 4101 + return MIN(cctx->appliedParams.maxBlockSize, (size_t)1 << cParams.windowLog); 4781 4102 } 4782 4103 4783 - size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) 4104 + /* NOTE: Must just wrap ZSTD_getBlockSize_deprecated() */ 4105 + size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) 4106 + { 4107 + return ZSTD_getBlockSize_deprecated(cctx); 4108 + } 4109 + 4110 + /* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */ 4111 + size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) 4784 4112 { 4785 4113 DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); 4786 - { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); 4114 + { size_t const blockSizeMax = ZSTD_getBlockSize_deprecated(cctx); 4787 4115 RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } 4788 4116 4789 4117 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); 4790 4118 } 4791 4119 4120 + /* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */ 4121 + size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) 4122 + { 4123 + return ZSTD_compressBlock_deprecated(cctx, dst, dstCapacity, src, srcSize); 4124 + } 4125 + 4792 4126 /*! ZSTD_loadDictionaryContent() : 4793 4127 * @return : 0, or an error code 4794 4128 */ 4795 - static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, 4796 - ldmState_t* ls, 4797 - ZSTD_cwksp* ws, 4798 - ZSTD_CCtx_params const* params, 4799 - const void* src, size_t srcSize, 4800 - ZSTD_dictTableLoadMethod_e dtlm) 4129 + static size_t 4130 + ZSTD_loadDictionaryContent(ZSTD_MatchState_t* ms, 4131 + ldmState_t* ls, 4132 + ZSTD_cwksp* ws, 4133 + ZSTD_CCtx_params const* params, 4134 + const void* src, size_t srcSize, 4135 + ZSTD_dictTableLoadMethod_e dtlm, 4136 + ZSTD_tableFillPurpose_e tfp) 4801 4137 { 4802 4138 const BYTE* ip = (const BYTE*) src; 4803 4139 const BYTE* const iend = ip + srcSize; 4804 4140 int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL; 4805 4141 4806 - /* Assert that we the ms params match the params we're being given */ 4142 + /* Assert that the ms params match the params we're being given */ 4807 4143 ZSTD_assertEqualCParams(params->cParams, ms->cParams); 4808 4144 4809 - if (srcSize > ZSTD_CHUNKSIZE_MAX) { 4145 + { /* Ensure large dictionaries can't cause index overflow */ 4146 + 4810 4147 /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX. 4811 4148 * Dictionaries right at the edge will immediately trigger overflow 4812 4149 * correction, but I don't want to insert extra constraints here. 4813 4150 */ 4814 - U32 const maxDictSize = ZSTD_CURRENT_MAX - 1; 4815 - /* We must have cleared our windows when our source is this large. */ 4816 - assert(ZSTD_window_isEmpty(ms->window)); 4817 - if (loadLdmDict) 4818 - assert(ZSTD_window_isEmpty(ls->window)); 4151 + U32 maxDictSize = ZSTD_CURRENT_MAX - ZSTD_WINDOW_START_INDEX; 4152 + 4153 + int const CDictTaggedIndices = ZSTD_CDictIndicesAreTagged(&params->cParams); 4154 + if (CDictTaggedIndices && tfp == ZSTD_tfp_forCDict) { 4155 + /* Some dictionary matchfinders in zstd use "short cache", 4156 + * which treats the lower ZSTD_SHORT_CACHE_TAG_BITS of each 4157 + * CDict hashtable entry as a tag rather than as part of an index. 4158 + * When short cache is used, we need to truncate the dictionary 4159 + * so that its indices don't overlap with the tag. */ 4160 + U32 const shortCacheMaxDictSize = (1u << (32 - ZSTD_SHORT_CACHE_TAG_BITS)) - ZSTD_WINDOW_START_INDEX; 4161 + maxDictSize = MIN(maxDictSize, shortCacheMaxDictSize); 4162 + assert(!loadLdmDict); 4163 + } 4164 + 4819 4165 /* If the dictionary is too large, only load the suffix of the dictionary. */ 4820 4166 if (srcSize > maxDictSize) { 4821 4167 ip = iend - maxDictSize; ··· 4856 4138 } 4857 4139 } 4858 4140 4859 - DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder); 4141 + if (srcSize > ZSTD_CHUNKSIZE_MAX) { 4142 + /* We must have cleared our windows when our source is this large. */ 4143 + assert(ZSTD_window_isEmpty(ms->window)); 4144 + if (loadLdmDict) assert(ZSTD_window_isEmpty(ls->window)); 4145 + } 4860 4146 ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0); 4861 - ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); 4862 - ms->forceNonContiguous = params->deterministicRefPrefix; 4863 4147 4864 - if (loadLdmDict) { 4148 + DEBUGLOG(4, "ZSTD_loadDictionaryContent: useRowMatchFinder=%d", (int)params->useRowMatchFinder); 4149 + 4150 + if (loadLdmDict) { /* Load the entire dict into LDM matchfinders. */ 4151 + DEBUGLOG(4, "ZSTD_loadDictionaryContent: Trigger loadLdmDict"); 4865 4152 ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0); 4866 4153 ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); 4154 + ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams); 4155 + DEBUGLOG(4, "ZSTD_loadDictionaryContent: ZSTD_ldm_fillHashTable completes"); 4867 4156 } 4157 + 4158 + /* If the dict is larger than we can reasonably index in our tables, only load the suffix. */ 4159 + { U32 maxDictSize = 1U << MIN(MAX(params->cParams.hashLog + 3, params->cParams.chainLog + 1), 31); 4160 + if (srcSize > maxDictSize) { 4161 + ip = iend - maxDictSize; 4162 + src = ip; 4163 + srcSize = maxDictSize; 4164 + } 4165 + } 4166 + 4167 + ms->nextToUpdate = (U32)(ip - ms->window.base); 4168 + ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); 4169 + ms->forceNonContiguous = params->deterministicRefPrefix; 4868 4170 4869 4171 if (srcSize <= HASH_READ_SIZE) return 0; 4870 4172 4871 4173 ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend); 4872 4174 4873 - if (loadLdmDict) 4874 - ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams); 4875 - 4876 4175 switch(params->cParams.strategy) 4877 4176 { 4878 4177 case ZSTD_fast: 4879 - ZSTD_fillHashTable(ms, iend, dtlm); 4178 + ZSTD_fillHashTable(ms, iend, dtlm, tfp); 4880 4179 break; 4881 4180 case ZSTD_dfast: 4882 - ZSTD_fillDoubleHashTable(ms, iend, dtlm); 4181 + #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR 4182 + ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp); 4183 + #else 4184 + assert(0); /* shouldn't be called: cparams should've been adjusted. */ 4185 + #endif 4883 4186 break; 4884 4187 4885 4188 case ZSTD_greedy: 4886 4189 case ZSTD_lazy: 4887 4190 case ZSTD_lazy2: 4191 + #if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ 4192 + || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ 4193 + || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) 4888 4194 assert(srcSize >= HASH_READ_SIZE); 4889 4195 if (ms->dedicatedDictSearch) { 4890 4196 assert(ms->chainTable != NULL); ··· 4916 4174 } else { 4917 4175 assert(params->useRowMatchFinder != ZSTD_ps_auto); 4918 4176 if (params->useRowMatchFinder == ZSTD_ps_enable) { 4919 - size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16); 4177 + size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog); 4920 4178 ZSTD_memset(ms->tagTable, 0, tagTableSize); 4921 4179 ZSTD_row_update(ms, iend-HASH_READ_SIZE); 4922 4180 DEBUGLOG(4, "Using row-based hash table for lazy dict"); ··· 4925 4183 DEBUGLOG(4, "Using chain-based hash table for lazy dict"); 4926 4184 } 4927 4185 } 4186 + #else 4187 + assert(0); /* shouldn't be called: cparams should've been adjusted. */ 4188 + #endif 4928 4189 break; 4929 4190 4930 4191 case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ 4931 4192 case ZSTD_btopt: 4932 4193 case ZSTD_btultra: 4933 4194 case ZSTD_btultra2: 4195 + #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ 4196 + || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ 4197 + || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) 4934 4198 assert(srcSize >= HASH_READ_SIZE); 4199 + DEBUGLOG(4, "Fill %u bytes into the Binary Tree", (unsigned)srcSize); 4935 4200 ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); 4201 + #else 4202 + assert(0); /* shouldn't be called: cparams should've been adjusted. */ 4203 + #endif 4936 4204 break; 4937 4205 4938 4206 default: ··· 4985 4233 { unsigned maxSymbolValue = 255; 4986 4234 unsigned hasZeroWeights = 1; 4987 4235 size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, 4988 - dictEnd-dictPtr, &hasZeroWeights); 4236 + (size_t)(dictEnd-dictPtr), &hasZeroWeights); 4989 4237 4990 4238 /* We only set the loaded table as valid if it contains all non-zero 4991 4239 * weights. Otherwise, we set it to check */ 4992 - if (!hasZeroWeights) 4240 + if (!hasZeroWeights && maxSymbolValue == 255) 4993 4241 bs->entropy.huf.repeatMode = HUF_repeat_valid; 4994 4242 4995 4243 RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); 4996 - RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); 4997 4244 dictPtr += hufHeaderSize; 4998 4245 } 4999 4246 5000 4247 { unsigned offcodeLog; 5001 - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); 4248 + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr)); 5002 4249 RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); 5003 4250 RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); 5004 4251 /* fill all offset symbols to avoid garbage at end of table */ ··· 5012 4261 5013 4262 { short matchlengthNCount[MaxML+1]; 5014 4263 unsigned matchlengthMaxValue = MaxML, matchlengthLog; 5015 - size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); 4264 + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); 5016 4265 RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); 5017 4266 RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); 5018 4267 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( ··· 5026 4275 5027 4276 { short litlengthNCount[MaxLL+1]; 5028 4277 unsigned litlengthMaxValue = MaxLL, litlengthLog; 5029 - size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); 4278 + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); 5030 4279 RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); 5031 4280 RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); 5032 4281 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( ··· 5060 4309 RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); 5061 4310 } } } 5062 4311 5063 - return dictPtr - (const BYTE*)dict; 4312 + return (size_t)(dictPtr - (const BYTE*)dict); 5064 4313 } 5065 4314 5066 4315 /* Dictionary format : ··· 5073 4322 * dictSize supposed >= 8 5074 4323 */ 5075 4324 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, 5076 - ZSTD_matchState_t* ms, 4325 + ZSTD_MatchState_t* ms, 5077 4326 ZSTD_cwksp* ws, 5078 4327 ZSTD_CCtx_params const* params, 5079 4328 const void* dict, size_t dictSize, 5080 4329 ZSTD_dictTableLoadMethod_e dtlm, 4330 + ZSTD_tableFillPurpose_e tfp, 5081 4331 void* workspace) 5082 4332 { 5083 4333 const BYTE* dictPtr = (const BYTE*)dict; ··· 5097 4345 { 5098 4346 size_t const dictContentSize = (size_t)(dictEnd - dictPtr); 5099 4347 FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( 5100 - ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); 4348 + ms, NULL, ws, params, dictPtr, dictContentSize, dtlm, tfp), ""); 5101 4349 } 5102 4350 return dictID; 5103 4351 } ··· 5106 4354 * @return : dictID, or an error code */ 5107 4355 static size_t 5108 4356 ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, 5109 - ZSTD_matchState_t* ms, 4357 + ZSTD_MatchState_t* ms, 5110 4358 ldmState_t* ls, 5111 4359 ZSTD_cwksp* ws, 5112 4360 const ZSTD_CCtx_params* params, 5113 4361 const void* dict, size_t dictSize, 5114 4362 ZSTD_dictContentType_e dictContentType, 5115 4363 ZSTD_dictTableLoadMethod_e dtlm, 4364 + ZSTD_tableFillPurpose_e tfp, 5116 4365 void* workspace) 5117 4366 { 5118 4367 DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); ··· 5126 4373 5127 4374 /* dict restricted modes */ 5128 4375 if (dictContentType == ZSTD_dct_rawContent) 5129 - return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); 4376 + return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm, tfp); 5130 4377 5131 4378 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { 5132 4379 if (dictContentType == ZSTD_dct_auto) { 5133 4380 DEBUGLOG(4, "raw content dictionary detected"); 5134 4381 return ZSTD_loadDictionaryContent( 5135 - ms, ls, ws, params, dict, dictSize, dtlm); 4382 + ms, ls, ws, params, dict, dictSize, dtlm, tfp); 5136 4383 } 5137 4384 RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); 5138 4385 assert(0); /* impossible */ ··· 5140 4387 5141 4388 /* dict as full zstd dictionary */ 5142 4389 return ZSTD_loadZstdDictionary( 5143 - bs, ms, ws, params, dict, dictSize, dtlm, workspace); 4390 + bs, ms, ws, params, dict, dictSize, dtlm, tfp, workspace); 5144 4391 } 5145 4392 5146 4393 #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) 5147 4394 #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL) 5148 4395 5149 4396 /*! ZSTD_compressBegin_internal() : 4397 + * Assumption : either @dict OR @cdict (or none) is non-NULL, never both 5150 4398 * @return : 0, or an error code */ 5151 4399 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, 5152 4400 const void* dict, size_t dictSize, ··· 5180 4426 cctx->blockState.prevCBlock, &cctx->blockState.matchState, 5181 4427 &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, 5182 4428 cdict->dictContentSize, cdict->dictContentType, dtlm, 5183 - cctx->entropyWorkspace) 4429 + ZSTD_tfp_forCCtx, cctx->tmpWorkspace) 5184 4430 : ZSTD_compress_insertDictionary( 5185 4431 cctx->blockState.prevCBlock, &cctx->blockState.matchState, 5186 4432 &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, 5187 - dictContentType, dtlm, cctx->entropyWorkspace); 4433 + dictContentType, dtlm, ZSTD_tfp_forCCtx, cctx->tmpWorkspace); 5188 4434 FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); 5189 4435 assert(dictID <= UINT_MAX); 5190 4436 cctx->dictID = (U32)dictID; ··· 5225 4471 &cctxParams, pledgedSrcSize); 5226 4472 } 5227 4473 5228 - size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) 4474 + static size_t 4475 + ZSTD_compressBegin_usingDict_deprecated(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) 5229 4476 { 5230 4477 ZSTD_CCtx_params cctxParams; 5231 - { 5232 - ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); 4478 + { ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); 5233 4479 ZSTD_CCtxParams_init_internal(&cctxParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel); 5234 4480 } 5235 4481 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); ··· 5237 4483 &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); 5238 4484 } 5239 4485 4486 + size_t 4487 + ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) 4488 + { 4489 + return ZSTD_compressBegin_usingDict_deprecated(cctx, dict, dictSize, compressionLevel); 4490 + } 4491 + 5240 4492 size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) 5241 4493 { 5242 - return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); 4494 + return ZSTD_compressBegin_usingDict_deprecated(cctx, NULL, 0, compressionLevel); 5243 4495 } 5244 4496 5245 4497 ··· 5256 4496 { 5257 4497 BYTE* const ostart = (BYTE*)dst; 5258 4498 BYTE* op = ostart; 5259 - size_t fhSize = 0; 5260 4499 5261 4500 DEBUGLOG(4, "ZSTD_writeEpilogue"); 5262 4501 RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); 5263 4502 5264 4503 /* special case : empty frame */ 5265 4504 if (cctx->stage == ZSTDcs_init) { 5266 - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); 4505 + size_t fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); 5267 4506 FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); 5268 4507 dstCapacity -= fhSize; 5269 4508 op += fhSize; ··· 5272 4513 if (cctx->stage != ZSTDcs_ending) { 5273 4514 /* write one last empty block, make it the "last" block */ 5274 4515 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; 5275 - RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); 5276 - MEM_writeLE32(op, cBlockHeader24); 4516 + ZSTD_STATIC_ASSERT(ZSTD_BLOCKHEADERSIZE == 3); 4517 + RETURN_ERROR_IF(dstCapacity<3, dstSize_tooSmall, "no room for epilogue"); 4518 + MEM_writeLE24(op, cBlockHeader24); 5277 4519 op += ZSTD_blockHeaderSize; 5278 4520 dstCapacity -= ZSTD_blockHeaderSize; 5279 4521 } ··· 5288 4528 } 5289 4529 5290 4530 cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ 5291 - return op-ostart; 4531 + return (size_t)(op-ostart); 5292 4532 } 5293 4533 5294 4534 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize) ··· 5297 4537 (void)extraCSize; 5298 4538 } 5299 4539 5300 - size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, 5301 - void* dst, size_t dstCapacity, 5302 - const void* src, size_t srcSize) 4540 + size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx, 4541 + void* dst, size_t dstCapacity, 4542 + const void* src, size_t srcSize) 5303 4543 { 5304 4544 size_t endResult; 5305 4545 size_t const cSize = ZSTD_compressContinue_internal(cctx, ··· 5321 4561 } 5322 4562 ZSTD_CCtx_trace(cctx, endResult); 5323 4563 return cSize + endResult; 4564 + } 4565 + 4566 + /* NOTE: Must just wrap ZSTD_compressEnd_public() */ 4567 + size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, 4568 + void* dst, size_t dstCapacity, 4569 + const void* src, size_t srcSize) 4570 + { 4571 + return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize); 5324 4572 } 5325 4573 5326 4574 size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, ··· 5359 4591 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, 5360 4592 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, 5361 4593 params, srcSize, ZSTDb_not_buffered) , ""); 5362 - return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); 4594 + return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize); 5363 4595 } 5364 4596 5365 4597 size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, ··· 5477 4709 { size_t const dictID = ZSTD_compress_insertDictionary( 5478 4710 &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, 5479 4711 &params, cdict->dictContent, cdict->dictContentSize, 5480 - dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); 4712 + dictContentType, ZSTD_dtlm_full, ZSTD_tfp_forCDict, cdict->entropyWorkspace); 5481 4713 FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); 5482 4714 assert(dictID <= (size_t)(U32)-1); 5483 4715 cdict->dictID = (U32)dictID; ··· 5487 4719 return 0; 5488 4720 } 5489 4721 5490 - static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, 5491 - ZSTD_dictLoadMethod_e dictLoadMethod, 5492 - ZSTD_compressionParameters cParams, 5493 - ZSTD_paramSwitch_e useRowMatchFinder, 5494 - U32 enableDedicatedDictSearch, 5495 - ZSTD_customMem customMem) 4722 + static ZSTD_CDict* 4723 + ZSTD_createCDict_advanced_internal(size_t dictSize, 4724 + ZSTD_dictLoadMethod_e dictLoadMethod, 4725 + ZSTD_compressionParameters cParams, 4726 + ZSTD_ParamSwitch_e useRowMatchFinder, 4727 + int enableDedicatedDictSearch, 4728 + ZSTD_customMem customMem) 5496 4729 { 5497 4730 if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; 4731 + DEBUGLOG(3, "ZSTD_createCDict_advanced_internal (dictSize=%u)", (unsigned)dictSize); 5498 4732 5499 4733 { size_t const workspaceSize = 5500 4734 ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + ··· 5533 4763 { 5534 4764 ZSTD_CCtx_params cctxParams; 5535 4765 ZSTD_memset(&cctxParams, 0, sizeof(cctxParams)); 4766 + DEBUGLOG(3, "ZSTD_createCDict_advanced, dictSize=%u, mode=%u", (unsigned)dictSize, (unsigned)dictContentType); 5536 4767 ZSTD_CCtxParams_init(&cctxParams, 0); 5537 4768 cctxParams.cParams = cParams; 5538 4769 cctxParams.customMem = customMem; ··· 5554 4783 ZSTD_compressionParameters cParams; 5555 4784 ZSTD_CDict* cdict; 5556 4785 5557 - DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType); 4786 + DEBUGLOG(3, "ZSTD_createCDict_advanced2, dictSize=%u, mode=%u", (unsigned)dictSize, (unsigned)dictContentType); 5558 4787 if (!customMem.customAlloc ^ !customMem.customFree) return NULL; 5559 4788 5560 4789 if (cctxParams.enableDedicatedDictSearch) { ··· 5573 4802 &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); 5574 4803 } 5575 4804 5576 - DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch); 4805 + DEBUGLOG(3, "ZSTD_createCDict_advanced2: DedicatedDictSearch=%u", cctxParams.enableDedicatedDictSearch); 5577 4806 cctxParams.cParams = cParams; 5578 4807 cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); 5579 4808 ··· 5581 4810 dictLoadMethod, cctxParams.cParams, 5582 4811 cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch, 5583 4812 customMem); 5584 - if (!cdict) 5585 - return NULL; 5586 4813 5587 - if (ZSTD_isError( ZSTD_initCDict_internal(cdict, 4814 + if (!cdict || ZSTD_isError( ZSTD_initCDict_internal(cdict, 5588 4815 dict, dictSize, 5589 4816 dictLoadMethod, dictContentType, 5590 4817 cctxParams) )) { ··· 5636 4867 * workspaceSize: Use ZSTD_estimateCDictSize() 5637 4868 * to determine how large workspace must be. 5638 4869 * cParams : use ZSTD_getCParams() to transform a compression level 5639 - * into its relevants cParams. 4870 + * into its relevant cParams. 5640 4871 * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) 5641 4872 * Note : there is no corresponding "free" function. 5642 4873 * Since workspace was allocated externally, it must be freed externally. ··· 5648 4879 ZSTD_dictContentType_e dictContentType, 5649 4880 ZSTD_compressionParameters cParams) 5650 4881 { 5651 - ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams); 4882 + ZSTD_ParamSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams); 5652 4883 /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */ 5653 4884 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0); 5654 4885 size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) ··· 5659 4890 ZSTD_CDict* cdict; 5660 4891 ZSTD_CCtx_params params; 5661 4892 4893 + DEBUGLOG(4, "ZSTD_initStaticCDict (dictSize==%u)", (unsigned)dictSize); 5662 4894 if ((size_t)workspace & 7) return NULL; /* 8-aligned */ 5663 4895 5664 4896 { ··· 5670 4900 ZSTD_cwksp_move(&cdict->workspace, &ws); 5671 4901 } 5672 4902 5673 - DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", 5674 - (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); 5675 4903 if (workspaceSize < neededSize) return NULL; 5676 4904 5677 4905 ZSTD_CCtxParams_init(&params, 0); 5678 4906 params.cParams = cParams; 5679 4907 params.useRowMatchFinder = useRowMatchFinder; 5680 4908 cdict->useRowMatchFinder = useRowMatchFinder; 4909 + cdict->compressionLevel = ZSTD_NO_CLEVEL; 5681 4910 5682 4911 if (ZSTD_isError( ZSTD_initCDict_internal(cdict, 5683 4912 dict, dictSize, ··· 5756 4987 5757 4988 /* ZSTD_compressBegin_usingCDict() : 5758 4989 * cdict must be != NULL */ 5759 - size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) 4990 + size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) 5760 4991 { 5761 4992 ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; 5762 4993 return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); 4994 + } 4995 + 4996 + size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) 4997 + { 4998 + return ZSTD_compressBegin_usingCDict_deprecated(cctx, cdict); 5763 4999 } 5764 5000 5765 5001 /*! ZSTD_compress_usingCDict_internal(): ··· 5776 5002 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) 5777 5003 { 5778 5004 FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ 5779 - return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); 5005 + return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize); 5780 5006 } 5781 5007 5782 5008 /*! ZSTD_compress_usingCDict_advanced(): ··· 5842 5068 return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; 5843 5069 } 5844 5070 5845 - static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize) 5071 + static ZSTD_CParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize) 5846 5072 { 5847 5073 if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) 5848 5074 return ZSTD_cpm_attachDict; ··· 5973 5199 5974 5200 static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) 5975 5201 { 5976 - size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; 5977 - if (hintInSize==0) hintInSize = cctx->blockSize; 5978 - return hintInSize; 5202 + if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { 5203 + return cctx->blockSizeMax - cctx->stableIn_notConsumed; 5204 + } 5205 + assert(cctx->appliedParams.inBufferMode == ZSTD_bm_buffered); 5206 + { size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; 5207 + if (hintInSize==0) hintInSize = cctx->blockSizeMax; 5208 + return hintInSize; 5209 + } 5979 5210 } 5980 5211 5981 5212 /* ZSTD_compressStream_generic(): 5982 5213 * internal function for all *compressStream*() variants 5983 - * non-static, because can be called from zstdmt_compress.c 5984 - * @return : hint size for next input */ 5214 + * @return : hint size for next input to complete ongoing block */ 5985 5215 static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, 5986 5216 ZSTD_outBuffer* output, 5987 5217 ZSTD_inBuffer* input, 5988 5218 ZSTD_EndDirective const flushMode) 5989 5219 { 5990 - const char* const istart = (const char*)input->src; 5991 - const char* const iend = input->size != 0 ? istart + input->size : istart; 5992 - const char* ip = input->pos != 0 ? istart + input->pos : istart; 5993 - char* const ostart = (char*)output->dst; 5994 - char* const oend = output->size != 0 ? ostart + output->size : ostart; 5995 - char* op = output->pos != 0 ? ostart + output->pos : ostart; 5220 + const char* const istart = (assert(input != NULL), (const char*)input->src); 5221 + const char* const iend = (istart != NULL) ? istart + input->size : istart; 5222 + const char* ip = (istart != NULL) ? istart + input->pos : istart; 5223 + char* const ostart = (assert(output != NULL), (char*)output->dst); 5224 + char* const oend = (ostart != NULL) ? ostart + output->size : ostart; 5225 + char* op = (ostart != NULL) ? ostart + output->pos : ostart; 5996 5226 U32 someMoreWork = 1; 5997 5227 5998 5228 /* check expectations */ 5999 - DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); 5229 + DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%i, srcSize = %zu", (int)flushMode, input->size - input->pos); 5230 + assert(zcs != NULL); 5231 + if (zcs->appliedParams.inBufferMode == ZSTD_bm_stable) { 5232 + assert(input->pos >= zcs->stableIn_notConsumed); 5233 + input->pos -= zcs->stableIn_notConsumed; 5234 + if (ip) ip -= zcs->stableIn_notConsumed; 5235 + zcs->stableIn_notConsumed = 0; 5236 + } 6000 5237 if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { 6001 5238 assert(zcs->inBuff != NULL); 6002 5239 assert(zcs->inBuffSize > 0); ··· 6016 5231 assert(zcs->outBuff != NULL); 6017 5232 assert(zcs->outBuffSize > 0); 6018 5233 } 6019 - assert(output->pos <= output->size); 5234 + if (input->src == NULL) assert(input->size == 0); 6020 5235 assert(input->pos <= input->size); 5236 + if (output->dst == NULL) assert(output->size == 0); 5237 + assert(output->pos <= output->size); 6021 5238 assert((U32)flushMode <= (U32)ZSTD_e_end); 6022 5239 6023 5240 while (someMoreWork) { ··· 6030 5243 6031 5244 case zcss_load: 6032 5245 if ( (flushMode == ZSTD_e_end) 6033 - && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip) /* Enough output space */ 5246 + && ( (size_t)(oend-op) >= ZSTD_compressBound((size_t)(iend-ip)) /* Enough output space */ 6034 5247 || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */ 6035 5248 && (zcs->inBuffPos == 0) ) { 6036 5249 /* shortcut to compression pass directly into output buffer */ 6037 - size_t const cSize = ZSTD_compressEnd(zcs, 6038 - op, oend-op, ip, iend-ip); 5250 + size_t const cSize = ZSTD_compressEnd_public(zcs, 5251 + op, (size_t)(oend-op), 5252 + ip, (size_t)(iend-ip)); 6039 5253 DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); 6040 5254 FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); 6041 5255 ip = iend; ··· 6050 5262 size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; 6051 5263 size_t const loaded = ZSTD_limitCopy( 6052 5264 zcs->inBuff + zcs->inBuffPos, toLoad, 6053 - ip, iend-ip); 5265 + ip, (size_t)(iend-ip)); 6054 5266 zcs->inBuffPos += loaded; 6055 - if (loaded != 0) 6056 - ip += loaded; 5267 + if (ip) ip += loaded; 6057 5268 if ( (flushMode == ZSTD_e_continue) 6058 5269 && (zcs->inBuffPos < zcs->inBuffTarget) ) { 6059 5270 /* not enough input to fill full block : stop here */ ··· 6063 5276 /* empty */ 6064 5277 someMoreWork = 0; break; 6065 5278 } 5279 + } else { 5280 + assert(zcs->appliedParams.inBufferMode == ZSTD_bm_stable); 5281 + if ( (flushMode == ZSTD_e_continue) 5282 + && ( (size_t)(iend - ip) < zcs->blockSizeMax) ) { 5283 + /* can't compress a full block : stop here */ 5284 + zcs->stableIn_notConsumed = (size_t)(iend - ip); 5285 + ip = iend; /* pretend to have consumed input */ 5286 + someMoreWork = 0; break; 5287 + } 5288 + if ( (flushMode == ZSTD_e_flush) 5289 + && (ip == iend) ) { 5290 + /* empty */ 5291 + someMoreWork = 0; break; 5292 + } 6066 5293 } 6067 5294 /* compress current block (note : this stage cannot be stopped in the middle) */ 6068 5295 DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); 6069 5296 { int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered); 6070 5297 void* cDst; 6071 5298 size_t cSize; 6072 - size_t oSize = oend-op; 6073 - size_t const iSize = inputBuffered 6074 - ? zcs->inBuffPos - zcs->inToCompress 6075 - : MIN((size_t)(iend - ip), zcs->blockSize); 5299 + size_t oSize = (size_t)(oend-op); 5300 + size_t const iSize = inputBuffered ? zcs->inBuffPos - zcs->inToCompress 5301 + : MIN((size_t)(iend - ip), zcs->blockSizeMax); 6076 5302 if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) 6077 5303 cDst = op; /* compress into output buffer, to skip flush stage */ 6078 5304 else ··· 6093 5293 if (inputBuffered) { 6094 5294 unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); 6095 5295 cSize = lastBlock ? 6096 - ZSTD_compressEnd(zcs, cDst, oSize, 5296 + ZSTD_compressEnd_public(zcs, cDst, oSize, 6097 5297 zcs->inBuff + zcs->inToCompress, iSize) : 6098 - ZSTD_compressContinue(zcs, cDst, oSize, 5298 + ZSTD_compressContinue_public(zcs, cDst, oSize, 6099 5299 zcs->inBuff + zcs->inToCompress, iSize); 6100 5300 FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); 6101 5301 zcs->frameEnded = lastBlock; 6102 5302 /* prepare next block */ 6103 - zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; 5303 + zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSizeMax; 6104 5304 if (zcs->inBuffTarget > zcs->inBuffSize) 6105 - zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; 5305 + zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSizeMax; 6106 5306 DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", 6107 5307 (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); 6108 5308 if (!lastBlock) 6109 5309 assert(zcs->inBuffTarget <= zcs->inBuffSize); 6110 5310 zcs->inToCompress = zcs->inBuffPos; 6111 - } else { 6112 - unsigned const lastBlock = (ip + iSize == iend); 6113 - assert(flushMode == ZSTD_e_end /* Already validated */); 5311 + } else { /* !inputBuffered, hence ZSTD_bm_stable */ 5312 + unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip + iSize == iend); 6114 5313 cSize = lastBlock ? 6115 - ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) : 6116 - ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize); 5314 + ZSTD_compressEnd_public(zcs, cDst, oSize, ip, iSize) : 5315 + ZSTD_compressContinue_public(zcs, cDst, oSize, ip, iSize); 6117 5316 /* Consume the input prior to error checking to mirror buffered mode. */ 6118 - if (iSize > 0) 6119 - ip += iSize; 5317 + if (ip) ip += iSize; 6120 5318 FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); 6121 5319 zcs->frameEnded = lastBlock; 6122 - if (lastBlock) 6123 - assert(ip == iend); 5320 + if (lastBlock) assert(ip == iend); 6124 5321 } 6125 5322 if (cDst == op) { /* no need to flush */ 6126 5323 op += cSize; ··· 6166 5369 } 6167 5370 } 6168 5371 6169 - input->pos = ip - istart; 6170 - output->pos = op - ostart; 5372 + input->pos = (size_t)(ip - istart); 5373 + output->pos = (size_t)(op - ostart); 6171 5374 if (zcs->frameEnded) return 0; 6172 5375 return ZSTD_nextInputSizeHint(zcs); 6173 5376 } ··· 6187 5390 /* After a compression call set the expected input/output buffer. 6188 5391 * This is validated at the start of the next compression call. 6189 5392 */ 6190 - static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input) 5393 + static void 5394 + ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, const ZSTD_outBuffer* output, const ZSTD_inBuffer* input) 6191 5395 { 5396 + DEBUGLOG(5, "ZSTD_setBufferExpectations (for advanced stable in/out modes)"); 6192 5397 if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { 6193 5398 cctx->expectedInBuffer = *input; 6194 5399 } ··· 6209 5410 { 6210 5411 if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { 6211 5412 ZSTD_inBuffer const expect = cctx->expectedInBuffer; 6212 - if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size) 6213 - RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!"); 6214 - if (endOp != ZSTD_e_end) 6215 - RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!"); 5413 + if (expect.src != input->src || expect.pos != input->pos) 5414 + RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableInBuffer enabled but input differs!"); 6216 5415 } 5416 + (void)endOp; 6217 5417 if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { 6218 5418 size_t const outBufferSize = output->size - output->pos; 6219 5419 if (cctx->expectedOutBufferSize != outBufferSize) 6220 - RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!"); 5420 + RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableOutBuffer enabled but output size differs!"); 6221 5421 } 6222 5422 return 0; 6223 5423 } 6224 5424 5425 + /* 5426 + * If @endOp == ZSTD_e_end, @inSize becomes pledgedSrcSize. 5427 + * Otherwise, it's ignored. 5428 + * @return: 0 on success, or a ZSTD_error code otherwise. 5429 + */ 6225 5430 static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, 6226 5431 ZSTD_EndDirective endOp, 6227 - size_t inSize) { 5432 + size_t inSize) 5433 + { 6228 5434 ZSTD_CCtx_params params = cctx->requestedParams; 6229 5435 ZSTD_prefixDict const prefixDict = cctx->prefixDict; 6230 5436 FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ ··· 6242 5438 */ 6243 5439 params.compressionLevel = cctx->cdict->compressionLevel; 6244 5440 } 6245 - DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); 6246 - if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */ 6247 - { 6248 - size_t const dictSize = prefixDict.dict 5441 + DEBUGLOG(4, "ZSTD_CCtx_init_compressStream2 : transparent init stage"); 5442 + if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-determine pledgedSrcSize */ 5443 + 5444 + { size_t const dictSize = prefixDict.dict 6249 5445 ? prefixDict.dictSize 6250 5446 : (cctx->cdict ? cctx->cdict->dictContentSize : 0); 6251 - ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, &params, cctx->pledgedSrcSizePlusOne - 1); 5447 + ZSTD_CParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, &params, cctx->pledgedSrcSizePlusOne - 1); 6252 5448 params.cParams = ZSTD_getCParamsFromCCtxParams( 6253 5449 &params, cctx->pledgedSrcSizePlusOne-1, 6254 5450 dictSize, mode); 6255 5451 } 6256 5452 6257 - params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, &params.cParams); 5453 + params.postBlockSplitter = ZSTD_resolveBlockSplitterMode(params.postBlockSplitter, &params.cParams); 6258 5454 params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, &params.cParams); 6259 5455 params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, &params.cParams); 5456 + params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences); 5457 + params.maxBlockSize = ZSTD_resolveMaxBlockSize(params.maxBlockSize); 5458 + params.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(params.searchForExternalRepcodes, params.compressionLevel); 6260 5459 6261 5460 { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; 6262 5461 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); ··· 6275 5468 /* for small input: avoid automatic flush on reaching end of block, since 6276 5469 * it would require to add a 3-bytes null block to end frame 6277 5470 */ 6278 - cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize); 5471 + cctx->inBuffTarget = cctx->blockSizeMax + (cctx->blockSizeMax == pledgedSrcSize); 6279 5472 } else { 6280 5473 cctx->inBuffTarget = 0; 6281 5474 } ··· 6286 5479 return 0; 6287 5480 } 6288 5481 5482 + /* @return provides a minimum amount of data remaining to be flushed from internal buffers 5483 + */ 6289 5484 size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, 6290 5485 ZSTD_outBuffer* output, 6291 5486 ZSTD_inBuffer* input, ··· 6302 5493 6303 5494 /* transparent initialization stage */ 6304 5495 if (cctx->streamStage == zcss_init) { 6305 - FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed"); 6306 - ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ 5496 + size_t const inputSize = input->size - input->pos; /* no obligation to start from pos==0 */ 5497 + size_t const totalInputSize = inputSize + cctx->stableIn_notConsumed; 5498 + if ( (cctx->requestedParams.inBufferMode == ZSTD_bm_stable) /* input is presumed stable, across invocations */ 5499 + && (endOp == ZSTD_e_continue) /* no flush requested, more input to come */ 5500 + && (totalInputSize < ZSTD_BLOCKSIZE_MAX) ) { /* not even reached one block yet */ 5501 + if (cctx->stableIn_notConsumed) { /* not the first time */ 5502 + /* check stable source guarantees */ 5503 + RETURN_ERROR_IF(input->src != cctx->expectedInBuffer.src, stabilityCondition_notRespected, "stableInBuffer condition not respected: wrong src pointer"); 5504 + RETURN_ERROR_IF(input->pos != cctx->expectedInBuffer.size, stabilityCondition_notRespected, "stableInBuffer condition not respected: externally modified pos"); 5505 + } 5506 + /* pretend input was consumed, to give a sense forward progress */ 5507 + input->pos = input->size; 5508 + /* save stable inBuffer, for later control, and flush/end */ 5509 + cctx->expectedInBuffer = *input; 5510 + /* but actually input wasn't consumed, so keep track of position from where compression shall resume */ 5511 + cctx->stableIn_notConsumed += inputSize; 5512 + /* don't initialize yet, wait for the first block of flush() order, for better parameters adaptation */ 5513 + return ZSTD_FRAMEHEADERSIZE_MIN(cctx->requestedParams.format); /* at least some header to produce */ 5514 + } 5515 + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, totalInputSize), "compressStream2 initialization failed"); 5516 + ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ 6307 5517 } 6308 5518 /* end of transparent initialization stage */ 6309 5519 ··· 6340 5512 const void* src, size_t srcSize, size_t* srcPos, 6341 5513 ZSTD_EndDirective endOp) 6342 5514 { 6343 - ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; 6344 - ZSTD_inBuffer input = { src, srcSize, *srcPos }; 5515 + ZSTD_outBuffer output; 5516 + ZSTD_inBuffer input; 5517 + output.dst = dst; 5518 + output.size = dstCapacity; 5519 + output.pos = *dstPos; 5520 + input.src = src; 5521 + input.size = srcSize; 5522 + input.pos = *srcPos; 6345 5523 /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ 6346 - size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); 6347 - *dstPos = output.pos; 6348 - *srcPos = input.pos; 6349 - return cErr; 5524 + { size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); 5525 + *dstPos = output.pos; 5526 + *srcPos = input.pos; 5527 + return cErr; 5528 + } 6350 5529 } 6351 5530 6352 5531 size_t ZSTD_compress2(ZSTD_CCtx* cctx, ··· 6376 5541 /* Reset to the original values. */ 6377 5542 cctx->requestedParams.inBufferMode = originalInBufferMode; 6378 5543 cctx->requestedParams.outBufferMode = originalOutBufferMode; 5544 + 6379 5545 FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); 6380 5546 if (result != 0) { /* compression not completed, due to lack of output space */ 6381 5547 assert(oPos == dstCapacity); ··· 6387 5551 } 6388 5552 } 6389 5553 6390 - typedef struct { 6391 - U32 idx; /* Index in array of ZSTD_Sequence */ 6392 - U32 posInSequence; /* Position within sequence at idx */ 6393 - size_t posInSrc; /* Number of bytes given by sequences provided so far */ 6394 - } ZSTD_sequencePosition; 6395 - 6396 5554 /* ZSTD_validateSequence() : 6397 - * @offCode : is presumed to follow format required by ZSTD_storeSeq() 5555 + * @offBase : must use the format required by ZSTD_storeSeq() 6398 5556 * @returns a ZSTD error code if sequence is not valid 6399 5557 */ 6400 5558 static size_t 6401 - ZSTD_validateSequence(U32 offCode, U32 matchLength, 6402 - size_t posInSrc, U32 windowLog, size_t dictSize) 5559 + ZSTD_validateSequence(U32 offBase, U32 matchLength, U32 minMatch, 5560 + size_t posInSrc, U32 windowLog, size_t dictSize, int useSequenceProducer) 6403 5561 { 6404 - U32 const windowSize = 1 << windowLog; 5562 + U32 const windowSize = 1u << windowLog; 6405 5563 /* posInSrc represents the amount of data the decoder would decode up to this point. 6406 5564 * As long as the amount of data decoded is less than or equal to window size, offsets may be 6407 5565 * larger than the total length of output decoded in order to reference the dict, even larger than 6408 5566 * window size. After output surpasses windowSize, we're limited to windowSize offsets again. 6409 5567 */ 6410 5568 size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; 6411 - RETURN_ERROR_IF(offCode > STORE_OFFSET(offsetBound), corruption_detected, "Offset too large!"); 6412 - RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small"); 5569 + size_t const matchLenLowerBound = (minMatch == 3 || useSequenceProducer) ? 3 : 4; 5570 + RETURN_ERROR_IF(offBase > OFFSET_TO_OFFBASE(offsetBound), externalSequences_invalid, "Offset too large!"); 5571 + /* Validate maxNbSeq is large enough for the given matchLength and minMatch */ 5572 + RETURN_ERROR_IF(matchLength < matchLenLowerBound, externalSequences_invalid, "Matchlength too small for the minMatch"); 6413 5573 return 0; 6414 5574 } 6415 5575 6416 5576 /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ 6417 - static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) 5577 + static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) 6418 5578 { 6419 - U32 offCode = STORE_OFFSET(rawOffset); 5579 + U32 offBase = OFFSET_TO_OFFBASE(rawOffset); 6420 5580 6421 5581 if (!ll0 && rawOffset == rep[0]) { 6422 - offCode = STORE_REPCODE_1; 5582 + offBase = REPCODE1_TO_OFFBASE; 6423 5583 } else if (rawOffset == rep[1]) { 6424 - offCode = STORE_REPCODE(2 - ll0); 5584 + offBase = REPCODE_TO_OFFBASE(2 - ll0); 6425 5585 } else if (rawOffset == rep[2]) { 6426 - offCode = STORE_REPCODE(3 - ll0); 5586 + offBase = REPCODE_TO_OFFBASE(3 - ll0); 6427 5587 } else if (ll0 && rawOffset == rep[0] - 1) { 6428 - offCode = STORE_REPCODE_3; 5588 + offBase = REPCODE3_TO_OFFBASE; 6429 5589 } 6430 - return offCode; 5590 + return offBase; 6431 5591 } 6432 5592 6433 - /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of 6434 - * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. 5593 + /* This function scans through an array of ZSTD_Sequence, 5594 + * storing the sequences it reads, until it reaches a block delimiter. 5595 + * Note that the block delimiter includes the last literals of the block. 5596 + * @blockSize must be == sum(sequence_lengths). 5597 + * @returns @blockSize on success, and a ZSTD_error otherwise. 6435 5598 */ 6436 5599 static size_t 6437 - ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, 6438 - ZSTD_sequencePosition* seqPos, 6439 - const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, 6440 - const void* src, size_t blockSize) 5600 + ZSTD_transferSequences_wBlockDelim(ZSTD_CCtx* cctx, 5601 + ZSTD_SequencePosition* seqPos, 5602 + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, 5603 + const void* src, size_t blockSize, 5604 + ZSTD_ParamSwitch_e externalRepSearch) 6441 5605 { 6442 5606 U32 idx = seqPos->idx; 5607 + U32 const startIdx = idx; 6443 5608 BYTE const* ip = (BYTE const*)(src); 6444 5609 const BYTE* const iend = ip + blockSize; 6445 - repcodes_t updatedRepcodes; 5610 + Repcodes_t updatedRepcodes; 6446 5611 U32 dictSize; 5612 + 5613 + DEBUGLOG(5, "ZSTD_transferSequences_wBlockDelim (blockSize = %zu)", blockSize); 6447 5614 6448 5615 if (cctx->cdict) { 6449 5616 dictSize = (U32)cctx->cdict->dictContentSize; ··· 6455 5616 } else { 6456 5617 dictSize = 0; 6457 5618 } 6458 - ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); 6459 - for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) { 5619 + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(Repcodes_t)); 5620 + for (; idx < inSeqsSize && (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0); ++idx) { 6460 5621 U32 const litLength = inSeqs[idx].litLength; 6461 - U32 const ll0 = (litLength == 0); 6462 5622 U32 const matchLength = inSeqs[idx].matchLength; 6463 - U32 const offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0); 6464 - ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); 5623 + U32 offBase; 6465 5624 6466 - DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); 5625 + if (externalRepSearch == ZSTD_ps_disable) { 5626 + offBase = OFFSET_TO_OFFBASE(inSeqs[idx].offset); 5627 + } else { 5628 + U32 const ll0 = (litLength == 0); 5629 + offBase = ZSTD_finalizeOffBase(inSeqs[idx].offset, updatedRepcodes.rep, ll0); 5630 + ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0); 5631 + } 5632 + 5633 + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength); 6467 5634 if (cctx->appliedParams.validateSequences) { 6468 5635 seqPos->posInSrc += litLength + matchLength; 6469 - FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, 6470 - cctx->appliedParams.cParams.windowLog, dictSize), 5636 + FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, 5637 + seqPos->posInSrc, 5638 + cctx->appliedParams.cParams.windowLog, dictSize, 5639 + ZSTD_hasExtSeqProd(&cctx->appliedParams)), 6471 5640 "Sequence validation failed"); 6472 5641 } 6473 - RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, 5642 + RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid, 6474 5643 "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); 6475 - ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength); 5644 + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength); 6476 5645 ip += matchLength + litLength; 6477 5646 } 6478 - ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); 5647 + RETURN_ERROR_IF(idx == inSeqsSize, externalSequences_invalid, "Block delimiter not found."); 5648 + 5649 + /* If we skipped repcode search while parsing, we need to update repcodes now */ 5650 + assert(externalRepSearch != ZSTD_ps_auto); 5651 + assert(idx >= startIdx); 5652 + if (externalRepSearch == ZSTD_ps_disable && idx != startIdx) { 5653 + U32* const rep = updatedRepcodes.rep; 5654 + U32 lastSeqIdx = idx - 1; /* index of last non-block-delimiter sequence */ 5655 + 5656 + if (lastSeqIdx >= startIdx + 2) { 5657 + rep[2] = inSeqs[lastSeqIdx - 2].offset; 5658 + rep[1] = inSeqs[lastSeqIdx - 1].offset; 5659 + rep[0] = inSeqs[lastSeqIdx].offset; 5660 + } else if (lastSeqIdx == startIdx + 1) { 5661 + rep[2] = rep[0]; 5662 + rep[1] = inSeqs[lastSeqIdx - 1].offset; 5663 + rep[0] = inSeqs[lastSeqIdx].offset; 5664 + } else { 5665 + assert(lastSeqIdx == startIdx); 5666 + rep[2] = rep[1]; 5667 + rep[1] = rep[0]; 5668 + rep[0] = inSeqs[lastSeqIdx].offset; 5669 + } 5670 + } 5671 + 5672 + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t)); 6479 5673 6480 5674 if (inSeqs[idx].litLength) { 6481 5675 DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength); ··· 6516 5644 ip += inSeqs[idx].litLength; 6517 5645 seqPos->posInSrc += inSeqs[idx].litLength; 6518 5646 } 6519 - RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!"); 5647 + RETURN_ERROR_IF(ip != iend, externalSequences_invalid, "Blocksize doesn't agree with block delimiter!"); 6520 5648 seqPos->idx = idx+1; 6521 - return 0; 5649 + return blockSize; 6522 5650 } 6523 5651 6524 - /* Returns the number of bytes to move the current read position back by. Only non-zero 6525 - * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something 6526 - * went wrong. 5652 + /* 5653 + * This function attempts to scan through @blockSize bytes in @src 5654 + * represented by the sequences in @inSeqs, 5655 + * storing any (partial) sequences. 6527 5656 * 6528 - * This function will attempt to scan through blockSize bytes represented by the sequences 6529 - * in inSeqs, storing any (partial) sequences. 5657 + * Occasionally, we may want to reduce the actual number of bytes consumed from @src 5658 + * to avoid splitting a match, notably if it would produce a match smaller than MINMATCH. 6530 5659 * 6531 - * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to 6532 - * avoid splitting a match, or to avoid splitting a match such that it would produce a match 6533 - * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. 5660 + * @returns the number of bytes consumed from @src, necessarily <= @blockSize. 5661 + * Otherwise, it may return a ZSTD error if something went wrong. 6534 5662 */ 6535 5663 static size_t 6536 - ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, 6537 - const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, 6538 - const void* src, size_t blockSize) 5664 + ZSTD_transferSequences_noDelim(ZSTD_CCtx* cctx, 5665 + ZSTD_SequencePosition* seqPos, 5666 + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, 5667 + const void* src, size_t blockSize, 5668 + ZSTD_ParamSwitch_e externalRepSearch) 6539 5669 { 6540 5670 U32 idx = seqPos->idx; 6541 5671 U32 startPosInSequence = seqPos->posInSequence; 6542 5672 U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize; 6543 5673 size_t dictSize; 6544 - BYTE const* ip = (BYTE const*)(src); 6545 - BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ 6546 - repcodes_t updatedRepcodes; 5674 + const BYTE* const istart = (const BYTE*)(src); 5675 + const BYTE* ip = istart; 5676 + const BYTE* iend = istart + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ 5677 + Repcodes_t updatedRepcodes; 6547 5678 U32 bytesAdjustment = 0; 6548 5679 U32 finalMatchSplit = 0; 5680 + 5681 + /* TODO(embg) support fast parsing mode in noBlockDelim mode */ 5682 + (void)externalRepSearch; 6549 5683 6550 5684 if (cctx->cdict) { 6551 5685 dictSize = cctx->cdict->dictContentSize; ··· 6560 5682 } else { 6561 5683 dictSize = 0; 6562 5684 } 6563 - DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize); 5685 + DEBUGLOG(5, "ZSTD_transferSequences_noDelim: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize); 6564 5686 DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); 6565 - ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); 5687 + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(Repcodes_t)); 6566 5688 while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) { 6567 5689 const ZSTD_Sequence currSeq = inSeqs[idx]; 6568 5690 U32 litLength = currSeq.litLength; 6569 5691 U32 matchLength = currSeq.matchLength; 6570 5692 U32 const rawOffset = currSeq.offset; 6571 - U32 offCode; 5693 + U32 offBase; 6572 5694 6573 5695 /* Modify the sequence depending on where endPosInSequence lies */ 6574 5696 if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { ··· 6582 5704 /* Move to the next sequence */ 6583 5705 endPosInSequence -= currSeq.litLength + currSeq.matchLength; 6584 5706 startPosInSequence = 0; 6585 - idx++; 6586 5707 } else { 6587 5708 /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence 6588 5709 does not reach the end of the match. So, we have to split the sequence */ ··· 6621 5744 } 6622 5745 /* Check if this offset can be represented with a repcode */ 6623 5746 { U32 const ll0 = (litLength == 0); 6624 - offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0); 6625 - ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); 5747 + offBase = ZSTD_finalizeOffBase(rawOffset, updatedRepcodes.rep, ll0); 5748 + ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0); 6626 5749 } 6627 5750 6628 5751 if (cctx->appliedParams.validateSequences) { 6629 5752 seqPos->posInSrc += litLength + matchLength; 6630 - FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, 6631 - cctx->appliedParams.cParams.windowLog, dictSize), 5753 + FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc, 5754 + cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)), 6632 5755 "Sequence validation failed"); 6633 5756 } 6634 - DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); 6635 - RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, 5757 + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength); 5758 + RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid, 6636 5759 "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); 6637 - ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength); 5760 + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength); 6638 5761 ip += matchLength + litLength; 5762 + if (!finalMatchSplit) 5763 + idx++; /* Next Sequence */ 6639 5764 } 6640 5765 DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); 6641 5766 assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength); 6642 5767 seqPos->idx = idx; 6643 5768 seqPos->posInSequence = endPosInSequence; 6644 - ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); 5769 + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t)); 6645 5770 6646 5771 iend -= bytesAdjustment; 6647 5772 if (ip != iend) { 6648 5773 /* Store any last literals */ 6649 - U32 lastLLSize = (U32)(iend - ip); 5774 + U32 const lastLLSize = (U32)(iend - ip); 6650 5775 assert(ip <= iend); 6651 5776 DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize); 6652 5777 ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize); 6653 5778 seqPos->posInSrc += lastLLSize; 6654 5779 } 6655 5780 6656 - return bytesAdjustment; 5781 + return (size_t)(iend-istart); 6657 5782 } 6658 5783 6659 - typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, 6660 - const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, 6661 - const void* src, size_t blockSize); 6662 - static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) 5784 + /* @seqPos represents a position within @inSeqs, 5785 + * it is read and updated by this function, 5786 + * once the goal to produce a block of size @blockSize is reached. 5787 + * @return: nb of bytes consumed from @src, necessarily <= @blockSize. 5788 + */ 5789 + typedef size_t (*ZSTD_SequenceCopier_f)(ZSTD_CCtx* cctx, 5790 + ZSTD_SequencePosition* seqPos, 5791 + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, 5792 + const void* src, size_t blockSize, 5793 + ZSTD_ParamSwitch_e externalRepSearch); 5794 + 5795 + static ZSTD_SequenceCopier_f ZSTD_selectSequenceCopier(ZSTD_SequenceFormat_e mode) 6663 5796 { 6664 - ZSTD_sequenceCopier sequenceCopier = NULL; 6665 - assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); 5797 + assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, (int)mode)); 6666 5798 if (mode == ZSTD_sf_explicitBlockDelimiters) { 6667 - return ZSTD_copySequencesToSeqStoreExplicitBlockDelim; 6668 - } else if (mode == ZSTD_sf_noBlockDelimiters) { 6669 - return ZSTD_copySequencesToSeqStoreNoBlockDelim; 5799 + return ZSTD_transferSequences_wBlockDelim; 6670 5800 } 6671 - assert(sequenceCopier != NULL); 6672 - return sequenceCopier; 5801 + assert(mode == ZSTD_sf_noBlockDelimiters); 5802 + return ZSTD_transferSequences_noDelim; 6673 5803 } 6674 5804 6675 - /* Compress, block-by-block, all of the sequences given. 5805 + /* Discover the size of next block by searching for the delimiter. 5806 + * Note that a block delimiter **must** exist in this mode, 5807 + * otherwise it's an input error. 5808 + * The block size retrieved will be later compared to ensure it remains within bounds */ 5809 + static size_t 5810 + blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_SequencePosition seqPos) 5811 + { 5812 + int end = 0; 5813 + size_t blockSize = 0; 5814 + size_t spos = seqPos.idx; 5815 + DEBUGLOG(6, "blockSize_explicitDelimiter : seq %zu / %zu", spos, inSeqsSize); 5816 + assert(spos <= inSeqsSize); 5817 + while (spos < inSeqsSize) { 5818 + end = (inSeqs[spos].offset == 0); 5819 + blockSize += inSeqs[spos].litLength + inSeqs[spos].matchLength; 5820 + if (end) { 5821 + if (inSeqs[spos].matchLength != 0) 5822 + RETURN_ERROR(externalSequences_invalid, "delimiter format error : both matchlength and offset must be == 0"); 5823 + break; 5824 + } 5825 + spos++; 5826 + } 5827 + if (!end) 5828 + RETURN_ERROR(externalSequences_invalid, "Reached end of sequences without finding a block delimiter"); 5829 + return blockSize; 5830 + } 5831 + 5832 + static size_t determine_blockSize(ZSTD_SequenceFormat_e mode, 5833 + size_t blockSize, size_t remaining, 5834 + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, 5835 + ZSTD_SequencePosition seqPos) 5836 + { 5837 + DEBUGLOG(6, "determine_blockSize : remainingSize = %zu", remaining); 5838 + if (mode == ZSTD_sf_noBlockDelimiters) { 5839 + /* Note: more a "target" block size */ 5840 + return MIN(remaining, blockSize); 5841 + } 5842 + assert(mode == ZSTD_sf_explicitBlockDelimiters); 5843 + { size_t const explicitBlockSize = blockSize_explicitDelimiter(inSeqs, inSeqsSize, seqPos); 5844 + FORWARD_IF_ERROR(explicitBlockSize, "Error while determining block size with explicit delimiters"); 5845 + if (explicitBlockSize > blockSize) 5846 + RETURN_ERROR(externalSequences_invalid, "sequences incorrectly define a too large block"); 5847 + if (explicitBlockSize > remaining) 5848 + RETURN_ERROR(externalSequences_invalid, "sequences define a frame longer than source"); 5849 + return explicitBlockSize; 5850 + } 5851 + } 5852 + 5853 + /* Compress all provided sequences, block-by-block. 6676 5854 * 6677 5855 * Returns the cumulative size of all compressed blocks (including their headers), 6678 5856 * otherwise a ZSTD error. ··· 6739 5807 const void* src, size_t srcSize) 6740 5808 { 6741 5809 size_t cSize = 0; 6742 - U32 lastBlock; 6743 - size_t blockSize; 6744 - size_t compressedSeqsSize; 6745 5810 size_t remaining = srcSize; 6746 - ZSTD_sequencePosition seqPos = {0, 0, 0}; 5811 + ZSTD_SequencePosition seqPos = {0, 0, 0}; 6747 5812 6748 - BYTE const* ip = (BYTE const*)src; 5813 + const BYTE* ip = (BYTE const*)src; 6749 5814 BYTE* op = (BYTE*)dst; 6750 - ZSTD_sequenceCopier const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); 5815 + ZSTD_SequenceCopier_f const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); 6751 5816 6752 5817 DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize); 6753 5818 /* Special case: empty frame */ ··· 6758 5829 } 6759 5830 6760 5831 while (remaining) { 5832 + size_t compressedSeqsSize; 6761 5833 size_t cBlockSize; 6762 - size_t additionalByteAdjustment; 6763 - lastBlock = remaining <= cctx->blockSize; 6764 - blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize; 5834 + size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters, 5835 + cctx->blockSizeMax, remaining, 5836 + inSeqs, inSeqsSize, seqPos); 5837 + U32 const lastBlock = (blockSize == remaining); 5838 + FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size"); 5839 + assert(blockSize <= remaining); 6765 5840 ZSTD_resetSeqStore(&cctx->seqStore); 6766 - DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize); 6767 5841 6768 - additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize); 6769 - FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy"); 6770 - blockSize -= additionalByteAdjustment; 5842 + blockSize = sequenceCopier(cctx, 5843 + &seqPos, inSeqs, inSeqsSize, 5844 + ip, blockSize, 5845 + cctx->appliedParams.searchForExternalRepcodes); 5846 + FORWARD_IF_ERROR(blockSize, "Bad sequence copy"); 6771 5847 6772 5848 /* If blocks are too small, emit as a nocompress block */ 6773 - if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { 5849 + /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding 5850 + * additional 1. We need to revisit and change this logic to be more consistent */ 5851 + if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { 6774 5852 cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); 6775 5853 FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); 6776 - DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); 5854 + DEBUGLOG(5, "Block too small (%zu): data remains uncompressed: cSize=%zu", blockSize, cBlockSize); 6777 5855 cSize += cBlockSize; 6778 5856 ip += blockSize; 6779 5857 op += cBlockSize; ··· 6789 5853 continue; 6790 5854 } 6791 5855 5856 + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block"); 6792 5857 compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore, 6793 5858 &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, 6794 5859 &cctx->appliedParams, 6795 5860 op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, 6796 5861 blockSize, 6797 - cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, 5862 + cctx->tmpWorkspace, cctx->tmpWkspSize /* statically allocated in resetCCtx */, 6798 5863 cctx->bmi2); 6799 5864 FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); 6800 - DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize); 5865 + DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize); 6801 5866 6802 5867 if (!cctx->isFirstBlock && 6803 5868 ZSTD_maybeRLE(&cctx->seqStore) && 6804 - ZSTD_isRLE((BYTE const*)src, srcSize)) { 6805 - /* We don't want to emit our first block as a RLE even if it qualifies because 6806 - * doing so will cause the decoder (cli only) to throw a "should consume all input error." 6807 - * This is only an issue for zstd <= v1.4.3 6808 - */ 5869 + ZSTD_isRLE(ip, blockSize)) { 5870 + /* Note: don't emit the first block as RLE even if it qualifies because 5871 + * doing so will cause the decoder (cli <= v1.4.3 only) to throw an (invalid) error 5872 + * "should consume all input error." 5873 + */ 6809 5874 compressedSeqsSize = 1; 6810 5875 } 6811 5876 6812 5877 if (compressedSeqsSize == 0) { 6813 5878 /* ZSTD_noCompressBlock writes the block header as well */ 6814 5879 cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); 6815 - FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); 6816 - DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize); 5880 + FORWARD_IF_ERROR(cBlockSize, "ZSTD_noCompressBlock failed"); 5881 + DEBUGLOG(5, "Writing out nocompress block, size: %zu", cBlockSize); 6817 5882 } else if (compressedSeqsSize == 1) { 6818 5883 cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock); 6819 - FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed"); 6820 - DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize); 5884 + FORWARD_IF_ERROR(cBlockSize, "ZSTD_rleCompressBlock failed"); 5885 + DEBUGLOG(5, "Writing out RLE block, size: %zu", cBlockSize); 6821 5886 } else { 6822 5887 U32 cBlockHeader; 6823 5888 /* Error checking and repcodes update */ ··· 6830 5893 cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3); 6831 5894 MEM_writeLE24(op, cBlockHeader); 6832 5895 cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize; 6833 - DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize); 5896 + DEBUGLOG(5, "Writing out compressed block, size: %zu", cBlockSize); 6834 5897 } 6835 5898 6836 5899 cSize += cBlockSize; 6837 - DEBUGLOG(4, "cSize running total: %zu", cSize); 6838 5900 6839 5901 if (lastBlock) { 6840 5902 break; ··· 6844 5908 dstCapacity -= cBlockSize; 6845 5909 cctx->isFirstBlock = 0; 6846 5910 } 5911 + DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity); 6847 5912 } 6848 5913 5914 + DEBUGLOG(4, "cSize final total: %zu", cSize); 6849 5915 return cSize; 6850 5916 } 6851 5917 6852 - size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity, 5918 + size_t ZSTD_compressSequences(ZSTD_CCtx* cctx, 5919 + void* dst, size_t dstCapacity, 6853 5920 const ZSTD_Sequence* inSeqs, size_t inSeqsSize, 6854 5921 const void* src, size_t srcSize) 6855 5922 { 6856 5923 BYTE* op = (BYTE*)dst; 6857 5924 size_t cSize = 0; 6858 - size_t compressedBlocksSize = 0; 6859 - size_t frameHeaderSize = 0; 6860 5925 6861 5926 /* Transparent initialization stage, same as compressStream2() */ 6862 - DEBUGLOG(3, "ZSTD_compressSequences()"); 5927 + DEBUGLOG(4, "ZSTD_compressSequences (nbSeqs=%zu,dstCapacity=%zu)", inSeqsSize, dstCapacity); 6863 5928 assert(cctx != NULL); 6864 5929 FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed"); 5930 + 6865 5931 /* Begin writing output, starting with frame header */ 6866 - frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID); 6867 - op += frameHeaderSize; 6868 - dstCapacity -= frameHeaderSize; 6869 - cSize += frameHeaderSize; 5932 + { size_t const frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, 5933 + &cctx->appliedParams, srcSize, cctx->dictID); 5934 + op += frameHeaderSize; 5935 + assert(frameHeaderSize <= dstCapacity); 5936 + dstCapacity -= frameHeaderSize; 5937 + cSize += frameHeaderSize; 5938 + } 6870 5939 if (cctx->appliedParams.fParams.checksumFlag && srcSize) { 6871 5940 xxh64_update(&cctx->xxhState, src, srcSize); 6872 5941 } 6873 - /* cSize includes block header size and compressed sequences size */ 6874 - compressedBlocksSize = ZSTD_compressSequences_internal(cctx, 5942 + 5943 + /* Now generate compressed blocks */ 5944 + { size_t const cBlocksSize = ZSTD_compressSequences_internal(cctx, 6875 5945 op, dstCapacity, 6876 5946 inSeqs, inSeqsSize, 6877 5947 src, srcSize); 6878 - FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!"); 6879 - cSize += compressedBlocksSize; 6880 - dstCapacity -= compressedBlocksSize; 5948 + FORWARD_IF_ERROR(cBlocksSize, "Compressing blocks failed!"); 5949 + cSize += cBlocksSize; 5950 + assert(cBlocksSize <= dstCapacity); 5951 + dstCapacity -= cBlocksSize; 5952 + } 6881 5953 5954 + /* Complete with frame checksum, if needed */ 6882 5955 if (cctx->appliedParams.fParams.checksumFlag) { 6883 5956 U32 const checksum = (U32) xxh64_digest(&cctx->xxhState); 6884 5957 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); ··· 6896 5951 cSize += 4; 6897 5952 } 6898 5953 6899 - DEBUGLOG(3, "Final compressed size: %zu", cSize); 5954 + DEBUGLOG(4, "Final compressed size: %zu", cSize); 5955 + return cSize; 5956 + } 5957 + 5958 + 5959 + #if defined(__AVX2__) 5960 + 5961 + #include <immintrin.h> /* AVX2 intrinsics */ 5962 + 5963 + /* 5964 + * Convert 2 sequences per iteration, using AVX2 intrinsics: 5965 + * - offset -> offBase = offset + 2 5966 + * - litLength -> (U16) litLength 5967 + * - matchLength -> (U16)(matchLength - 3) 5968 + * - rep is ignored 5969 + * Store only 8 bytes per SeqDef (offBase[4], litLength[2], mlBase[2]). 5970 + * 5971 + * At the end, instead of extracting two __m128i, 5972 + * we use _mm256_permute4x64_epi64(..., 0xE8) to move lane2 into lane1, 5973 + * then store the lower 16 bytes in one go. 5974 + * 5975 + * @returns 0 on succes, with no long length detected 5976 + * @returns > 0 if there is one long length (> 65535), 5977 + * indicating the position, and type. 5978 + */ 5979 + static size_t convertSequences_noRepcodes( 5980 + SeqDef* dstSeqs, 5981 + const ZSTD_Sequence* inSeqs, 5982 + size_t nbSequences) 5983 + { 5984 + /* 5985 + * addition: 5986 + * For each 128-bit half: (offset+2, litLength+0, matchLength-3, rep+0) 5987 + */ 5988 + const __m256i addition = _mm256_setr_epi32( 5989 + ZSTD_REP_NUM, 0, -MINMATCH, 0, /* for sequence i */ 5990 + ZSTD_REP_NUM, 0, -MINMATCH, 0 /* for sequence i+1 */ 5991 + ); 5992 + 5993 + /* limit: check if there is a long length */ 5994 + const __m256i limit = _mm256_set1_epi32(65535); 5995 + 5996 + /* 5997 + * shuffle mask for byte-level rearrangement in each 128-bit half: 5998 + * 5999 + * Input layout (after addition) per 128-bit half: 6000 + * [ offset+2 (4 bytes) | litLength (4 bytes) | matchLength (4 bytes) | rep (4 bytes) ] 6001 + * We only need: 6002 + * offBase (4 bytes) = offset+2 6003 + * litLength (2 bytes) = low 2 bytes of litLength 6004 + * mlBase (2 bytes) = low 2 bytes of (matchLength) 6005 + * => Bytes [0..3, 4..5, 8..9], zero the rest. 6006 + */ 6007 + const __m256i mask = _mm256_setr_epi8( 6008 + /* For the lower 128 bits => sequence i */ 6009 + 0, 1, 2, 3, /* offset+2 */ 6010 + 4, 5, /* litLength (16 bits) */ 6011 + 8, 9, /* matchLength (16 bits) */ 6012 + (BYTE)0x80, (BYTE)0x80, (BYTE)0x80, (BYTE)0x80, 6013 + (BYTE)0x80, (BYTE)0x80, (BYTE)0x80, (BYTE)0x80, 6014 + 6015 + /* For the upper 128 bits => sequence i+1 */ 6016 + 16,17,18,19, /* offset+2 */ 6017 + 20,21, /* litLength */ 6018 + 24,25, /* matchLength */ 6019 + (BYTE)0x80, (BYTE)0x80, (BYTE)0x80, (BYTE)0x80, 6020 + (BYTE)0x80, (BYTE)0x80, (BYTE)0x80, (BYTE)0x80 6021 + ); 6022 + 6023 + /* 6024 + * Next, we'll use _mm256_permute4x64_epi64(vshf, 0xE8). 6025 + * Explanation of 0xE8 = 11101000b => [lane0, lane2, lane2, lane3]. 6026 + * So the lower 128 bits become [lane0, lane2] => combining seq0 and seq1. 6027 + */ 6028 + #define PERM_LANE_0X_E8 0xE8 /* [0,2,2,3] in lane indices */ 6029 + 6030 + size_t longLen = 0, i = 0; 6031 + 6032 + /* AVX permutation depends on the specific definition of target structures */ 6033 + ZSTD_STATIC_ASSERT(sizeof(ZSTD_Sequence) == 16); 6034 + ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, offset) == 0); 6035 + ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, litLength) == 4); 6036 + ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, matchLength) == 8); 6037 + ZSTD_STATIC_ASSERT(sizeof(SeqDef) == 8); 6038 + ZSTD_STATIC_ASSERT(offsetof(SeqDef, offBase) == 0); 6039 + ZSTD_STATIC_ASSERT(offsetof(SeqDef, litLength) == 4); 6040 + ZSTD_STATIC_ASSERT(offsetof(SeqDef, mlBase) == 6); 6041 + 6042 + /* Process 2 sequences per loop iteration */ 6043 + for (; i + 1 < nbSequences; i += 2) { 6044 + /* Load 2 ZSTD_Sequence (32 bytes) */ 6045 + __m256i vin = _mm256_loadu_si256((const __m256i*)(const void*)&inSeqs[i]); 6046 + 6047 + /* Add {2, 0, -3, 0} in each 128-bit half */ 6048 + __m256i vadd = _mm256_add_epi32(vin, addition); 6049 + 6050 + /* Check for long length */ 6051 + __m256i ll_cmp = _mm256_cmpgt_epi32(vadd, limit); /* 0xFFFFFFFF for element > 65535 */ 6052 + int ll_res = _mm256_movemask_epi8(ll_cmp); 6053 + 6054 + /* Shuffle bytes so each half gives us the 8 bytes we need */ 6055 + __m256i vshf = _mm256_shuffle_epi8(vadd, mask); 6056 + /* 6057 + * Now: 6058 + * Lane0 = seq0's 8 bytes 6059 + * Lane1 = 0 6060 + * Lane2 = seq1's 8 bytes 6061 + * Lane3 = 0 6062 + */ 6063 + 6064 + /* Permute 64-bit lanes => move Lane2 down into Lane1. */ 6065 + __m256i vperm = _mm256_permute4x64_epi64(vshf, PERM_LANE_0X_E8); 6066 + /* 6067 + * Now the lower 16 bytes (Lane0+Lane1) = [seq0, seq1]. 6068 + * The upper 16 bytes are [Lane2, Lane3] = [seq1, 0], but we won't use them. 6069 + */ 6070 + 6071 + /* Store only the lower 16 bytes => 2 SeqDef (8 bytes each) */ 6072 + _mm_storeu_si128((__m128i *)(void*)&dstSeqs[i], _mm256_castsi256_si128(vperm)); 6073 + /* 6074 + * This writes out 16 bytes total: 6075 + * - offset 0..7 => seq0 (offBase, litLength, mlBase) 6076 + * - offset 8..15 => seq1 (offBase, litLength, mlBase) 6077 + */ 6078 + 6079 + /* check (unlikely) long lengths > 65535 6080 + * indices for lengths correspond to bits [4..7], [8..11], [20..23], [24..27] 6081 + * => combined mask = 0x0FF00FF0 6082 + */ 6083 + if (UNLIKELY((ll_res & 0x0FF00FF0) != 0)) { 6084 + /* long length detected: let's figure out which one*/ 6085 + if (inSeqs[i].matchLength > 65535+MINMATCH) { 6086 + assert(longLen == 0); 6087 + longLen = i + 1; 6088 + } 6089 + if (inSeqs[i].litLength > 65535) { 6090 + assert(longLen == 0); 6091 + longLen = i + nbSequences + 1; 6092 + } 6093 + if (inSeqs[i+1].matchLength > 65535+MINMATCH) { 6094 + assert(longLen == 0); 6095 + longLen = i + 1 + 1; 6096 + } 6097 + if (inSeqs[i+1].litLength > 65535) { 6098 + assert(longLen == 0); 6099 + longLen = i + 1 + nbSequences + 1; 6100 + } 6101 + } 6102 + } 6103 + 6104 + /* Handle leftover if @nbSequences is odd */ 6105 + if (i < nbSequences) { 6106 + /* process last sequence */ 6107 + assert(i == nbSequences - 1); 6108 + dstSeqs[i].offBase = OFFSET_TO_OFFBASE(inSeqs[i].offset); 6109 + dstSeqs[i].litLength = (U16)inSeqs[i].litLength; 6110 + dstSeqs[i].mlBase = (U16)(inSeqs[i].matchLength - MINMATCH); 6111 + /* check (unlikely) long lengths > 65535 */ 6112 + if (UNLIKELY(inSeqs[i].matchLength > 65535+MINMATCH)) { 6113 + assert(longLen == 0); 6114 + longLen = i + 1; 6115 + } 6116 + if (UNLIKELY(inSeqs[i].litLength > 65535)) { 6117 + assert(longLen == 0); 6118 + longLen = i + nbSequences + 1; 6119 + } 6120 + } 6121 + 6122 + return longLen; 6123 + } 6124 + 6125 + /* the vector implementation could also be ported to SSSE3, 6126 + * but since this implementation is targeting modern systems (>= Sapphire Rapid), 6127 + * it's not useful to develop and maintain code for older pre-AVX2 platforms */ 6128 + 6129 + #else /* no AVX2 */ 6130 + 6131 + static size_t convertSequences_noRepcodes( 6132 + SeqDef* dstSeqs, 6133 + const ZSTD_Sequence* inSeqs, 6134 + size_t nbSequences) 6135 + { 6136 + size_t longLen = 0; 6137 + size_t n; 6138 + for (n=0; n<nbSequences; n++) { 6139 + dstSeqs[n].offBase = OFFSET_TO_OFFBASE(inSeqs[n].offset); 6140 + dstSeqs[n].litLength = (U16)inSeqs[n].litLength; 6141 + dstSeqs[n].mlBase = (U16)(inSeqs[n].matchLength - MINMATCH); 6142 + /* check for long length > 65535 */ 6143 + if (UNLIKELY(inSeqs[n].matchLength > 65535+MINMATCH)) { 6144 + assert(longLen == 0); 6145 + longLen = n + 1; 6146 + } 6147 + if (UNLIKELY(inSeqs[n].litLength > 65535)) { 6148 + assert(longLen == 0); 6149 + longLen = n + nbSequences + 1; 6150 + } 6151 + } 6152 + return longLen; 6153 + } 6154 + 6155 + #endif 6156 + 6157 + /* 6158 + * Precondition: Sequences must end on an explicit Block Delimiter 6159 + * @return: 0 on success, or an error code. 6160 + * Note: Sequence validation functionality has been disabled (removed). 6161 + * This is helpful to generate a lean main pipeline, improving performance. 6162 + * It may be re-inserted later. 6163 + */ 6164 + size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx, 6165 + const ZSTD_Sequence* const inSeqs, size_t nbSequences, 6166 + int repcodeResolution) 6167 + { 6168 + Repcodes_t updatedRepcodes; 6169 + size_t seqNb = 0; 6170 + 6171 + DEBUGLOG(5, "ZSTD_convertBlockSequences (nbSequences = %zu)", nbSequences); 6172 + 6173 + RETURN_ERROR_IF(nbSequences >= cctx->seqStore.maxNbSeq, externalSequences_invalid, 6174 + "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); 6175 + 6176 + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(Repcodes_t)); 6177 + 6178 + /* check end condition */ 6179 + assert(nbSequences >= 1); 6180 + assert(inSeqs[nbSequences-1].matchLength == 0); 6181 + assert(inSeqs[nbSequences-1].offset == 0); 6182 + 6183 + /* Convert Sequences from public format to internal format */ 6184 + if (!repcodeResolution) { 6185 + size_t const longl = convertSequences_noRepcodes(cctx->seqStore.sequencesStart, inSeqs, nbSequences-1); 6186 + cctx->seqStore.sequences = cctx->seqStore.sequencesStart + nbSequences-1; 6187 + if (longl) { 6188 + DEBUGLOG(5, "long length"); 6189 + assert(cctx->seqStore.longLengthType == ZSTD_llt_none); 6190 + if (longl <= nbSequences-1) { 6191 + DEBUGLOG(5, "long match length detected at pos %zu", longl-1); 6192 + cctx->seqStore.longLengthType = ZSTD_llt_matchLength; 6193 + cctx->seqStore.longLengthPos = (U32)(longl-1); 6194 + } else { 6195 + DEBUGLOG(5, "long literals length detected at pos %zu", longl-nbSequences); 6196 + assert(longl <= 2* (nbSequences-1)); 6197 + cctx->seqStore.longLengthType = ZSTD_llt_literalLength; 6198 + cctx->seqStore.longLengthPos = (U32)(longl-(nbSequences-1)-1); 6199 + } 6200 + } 6201 + } else { 6202 + for (seqNb = 0; seqNb < nbSequences - 1 ; seqNb++) { 6203 + U32 const litLength = inSeqs[seqNb].litLength; 6204 + U32 const matchLength = inSeqs[seqNb].matchLength; 6205 + U32 const ll0 = (litLength == 0); 6206 + U32 const offBase = ZSTD_finalizeOffBase(inSeqs[seqNb].offset, updatedRepcodes.rep, ll0); 6207 + 6208 + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength); 6209 + ZSTD_storeSeqOnly(&cctx->seqStore, litLength, offBase, matchLength); 6210 + ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0); 6211 + } 6212 + } 6213 + 6214 + /* If we skipped repcode search while parsing, we need to update repcodes now */ 6215 + if (!repcodeResolution && nbSequences > 1) { 6216 + U32* const rep = updatedRepcodes.rep; 6217 + 6218 + if (nbSequences >= 4) { 6219 + U32 lastSeqIdx = (U32)nbSequences - 2; /* index of last full sequence */ 6220 + rep[2] = inSeqs[lastSeqIdx - 2].offset; 6221 + rep[1] = inSeqs[lastSeqIdx - 1].offset; 6222 + rep[0] = inSeqs[lastSeqIdx].offset; 6223 + } else if (nbSequences == 3) { 6224 + rep[2] = rep[0]; 6225 + rep[1] = inSeqs[0].offset; 6226 + rep[0] = inSeqs[1].offset; 6227 + } else { 6228 + assert(nbSequences == 2); 6229 + rep[2] = rep[1]; 6230 + rep[1] = rep[0]; 6231 + rep[0] = inSeqs[0].offset; 6232 + } 6233 + } 6234 + 6235 + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(Repcodes_t)); 6236 + 6237 + return 0; 6238 + } 6239 + 6240 + #if defined(ZSTD_ARCH_X86_AVX2) 6241 + 6242 + BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs) 6243 + { 6244 + size_t i; 6245 + __m256i const zeroVec = _mm256_setzero_si256(); 6246 + __m256i sumVec = zeroVec; /* accumulates match+lit in 32-bit lanes */ 6247 + ZSTD_ALIGNED(32) U32 tmp[8]; /* temporary buffer for reduction */ 6248 + size_t mSum = 0, lSum = 0; 6249 + ZSTD_STATIC_ASSERT(sizeof(ZSTD_Sequence) == 16); 6250 + 6251 + /* Process 2 structs (32 bytes) at a time */ 6252 + for (i = 0; i + 2 <= nbSeqs; i += 2) { 6253 + /* Load two consecutive ZSTD_Sequence (8×4 = 32 bytes) */ 6254 + __m256i data = _mm256_loadu_si256((const __m256i*)(const void*)&seqs[i]); 6255 + /* check end of block signal */ 6256 + __m256i cmp = _mm256_cmpeq_epi32(data, zeroVec); 6257 + int cmp_res = _mm256_movemask_epi8(cmp); 6258 + /* indices for match lengths correspond to bits [8..11], [24..27] 6259 + * => combined mask = 0x0F000F00 */ 6260 + ZSTD_STATIC_ASSERT(offsetof(ZSTD_Sequence, matchLength) == 8); 6261 + if (cmp_res & 0x0F000F00) break; 6262 + /* Accumulate in sumVec */ 6263 + sumVec = _mm256_add_epi32(sumVec, data); 6264 + } 6265 + 6266 + /* Horizontal reduction */ 6267 + _mm256_store_si256((__m256i*)tmp, sumVec); 6268 + lSum = tmp[1] + tmp[5]; 6269 + mSum = tmp[2] + tmp[6]; 6270 + 6271 + /* Handle the leftover */ 6272 + for (; i < nbSeqs; i++) { 6273 + lSum += seqs[i].litLength; 6274 + mSum += seqs[i].matchLength; 6275 + if (seqs[i].matchLength == 0) break; /* end of block */ 6276 + } 6277 + 6278 + if (i==nbSeqs) { 6279 + /* reaching end of sequences: end of block signal was not present */ 6280 + BlockSummary bs; 6281 + bs.nbSequences = ERROR(externalSequences_invalid); 6282 + return bs; 6283 + } 6284 + { BlockSummary bs; 6285 + bs.nbSequences = i+1; 6286 + bs.blockSize = lSum + mSum; 6287 + bs.litSize = lSum; 6288 + return bs; 6289 + } 6290 + } 6291 + 6292 + #else 6293 + 6294 + BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs) 6295 + { 6296 + size_t totalMatchSize = 0; 6297 + size_t litSize = 0; 6298 + size_t n; 6299 + assert(seqs); 6300 + for (n=0; n<nbSeqs; n++) { 6301 + totalMatchSize += seqs[n].matchLength; 6302 + litSize += seqs[n].litLength; 6303 + if (seqs[n].matchLength == 0) { 6304 + assert(seqs[n].offset == 0); 6305 + break; 6306 + } 6307 + } 6308 + if (n==nbSeqs) { 6309 + BlockSummary bs; 6310 + bs.nbSequences = ERROR(externalSequences_invalid); 6311 + return bs; 6312 + } 6313 + { BlockSummary bs; 6314 + bs.nbSequences = n+1; 6315 + bs.blockSize = litSize + totalMatchSize; 6316 + bs.litSize = litSize; 6317 + return bs; 6318 + } 6319 + } 6320 + #endif 6321 + 6322 + 6323 + static size_t 6324 + ZSTD_compressSequencesAndLiterals_internal(ZSTD_CCtx* cctx, 6325 + void* dst, size_t dstCapacity, 6326 + const ZSTD_Sequence* inSeqs, size_t nbSequences, 6327 + const void* literals, size_t litSize, size_t srcSize) 6328 + { 6329 + size_t remaining = srcSize; 6330 + size_t cSize = 0; 6331 + BYTE* op = (BYTE*)dst; 6332 + int const repcodeResolution = (cctx->appliedParams.searchForExternalRepcodes == ZSTD_ps_enable); 6333 + assert(cctx->appliedParams.searchForExternalRepcodes != ZSTD_ps_auto); 6334 + 6335 + DEBUGLOG(4, "ZSTD_compressSequencesAndLiterals_internal: nbSeqs=%zu, litSize=%zu", nbSequences, litSize); 6336 + RETURN_ERROR_IF(nbSequences == 0, externalSequences_invalid, "Requires at least 1 end-of-block"); 6337 + 6338 + /* Special case: empty frame */ 6339 + if ((nbSequences == 1) && (inSeqs[0].litLength == 0)) { 6340 + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1); 6341 + RETURN_ERROR_IF(dstCapacity<3, dstSize_tooSmall, "No room for empty frame block header"); 6342 + MEM_writeLE24(op, cBlockHeader24); 6343 + op += ZSTD_blockHeaderSize; 6344 + dstCapacity -= ZSTD_blockHeaderSize; 6345 + cSize += ZSTD_blockHeaderSize; 6346 + } 6347 + 6348 + while (nbSequences) { 6349 + size_t compressedSeqsSize, cBlockSize, conversionStatus; 6350 + BlockSummary const block = ZSTD_get1BlockSummary(inSeqs, nbSequences); 6351 + U32 const lastBlock = (block.nbSequences == nbSequences); 6352 + FORWARD_IF_ERROR(block.nbSequences, "Error while trying to determine nb of sequences for a block"); 6353 + assert(block.nbSequences <= nbSequences); 6354 + RETURN_ERROR_IF(block.litSize > litSize, externalSequences_invalid, "discrepancy: Sequences require more literals than present in buffer"); 6355 + ZSTD_resetSeqStore(&cctx->seqStore); 6356 + 6357 + conversionStatus = ZSTD_convertBlockSequences(cctx, 6358 + inSeqs, block.nbSequences, 6359 + repcodeResolution); 6360 + FORWARD_IF_ERROR(conversionStatus, "Bad sequence conversion"); 6361 + inSeqs += block.nbSequences; 6362 + nbSequences -= block.nbSequences; 6363 + remaining -= block.blockSize; 6364 + 6365 + /* Note: when blockSize is very small, other variant send it uncompressed. 6366 + * Here, we still send the sequences, because we don't have the original source to send it uncompressed. 6367 + * One could imagine in theory reproducing the source from the sequences, 6368 + * but that's complex and costly memory intensive, and goes against the objectives of this variant. */ 6369 + 6370 + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block"); 6371 + 6372 + compressedSeqsSize = ZSTD_entropyCompressSeqStore_internal( 6373 + op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, 6374 + literals, block.litSize, 6375 + &cctx->seqStore, 6376 + &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, 6377 + &cctx->appliedParams, 6378 + cctx->tmpWorkspace, cctx->tmpWkspSize /* statically allocated in resetCCtx */, 6379 + cctx->bmi2); 6380 + FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); 6381 + /* note: the spec forbids for any compressed block to be larger than maximum block size */ 6382 + if (compressedSeqsSize > cctx->blockSizeMax) compressedSeqsSize = 0; 6383 + DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize); 6384 + litSize -= block.litSize; 6385 + literals = (const char*)literals + block.litSize; 6386 + 6387 + /* Note: difficult to check source for RLE block when only Literals are provided, 6388 + * but it could be considered from analyzing the sequence directly */ 6389 + 6390 + if (compressedSeqsSize == 0) { 6391 + /* Sending uncompressed blocks is out of reach, because the source is not provided. 6392 + * In theory, one could use the sequences to regenerate the source, like a decompressor, 6393 + * but it's complex, and memory hungry, killing the purpose of this variant. 6394 + * Current outcome: generate an error code. 6395 + */ 6396 + RETURN_ERROR(cannotProduce_uncompressedBlock, "ZSTD_compressSequencesAndLiterals cannot generate an uncompressed block"); 6397 + } else { 6398 + U32 cBlockHeader; 6399 + assert(compressedSeqsSize > 1); /* no RLE */ 6400 + /* Error checking and repcodes update */ 6401 + ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState); 6402 + if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) 6403 + cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; 6404 + 6405 + /* Write block header into beginning of block*/ 6406 + cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3); 6407 + MEM_writeLE24(op, cBlockHeader); 6408 + cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize; 6409 + DEBUGLOG(5, "Writing out compressed block, size: %zu", cBlockSize); 6410 + } 6411 + 6412 + cSize += cBlockSize; 6413 + op += cBlockSize; 6414 + dstCapacity -= cBlockSize; 6415 + cctx->isFirstBlock = 0; 6416 + DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity); 6417 + 6418 + if (lastBlock) { 6419 + assert(nbSequences == 0); 6420 + break; 6421 + } 6422 + } 6423 + 6424 + RETURN_ERROR_IF(litSize != 0, externalSequences_invalid, "literals must be entirely and exactly consumed"); 6425 + RETURN_ERROR_IF(remaining != 0, externalSequences_invalid, "Sequences must represent a total of exactly srcSize=%zu", srcSize); 6426 + DEBUGLOG(4, "cSize final total: %zu", cSize); 6427 + return cSize; 6428 + } 6429 + 6430 + size_t 6431 + ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx, 6432 + void* dst, size_t dstCapacity, 6433 + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, 6434 + const void* literals, size_t litSize, size_t litCapacity, 6435 + size_t decompressedSize) 6436 + { 6437 + BYTE* op = (BYTE*)dst; 6438 + size_t cSize = 0; 6439 + 6440 + /* Transparent initialization stage, same as compressStream2() */ 6441 + DEBUGLOG(4, "ZSTD_compressSequencesAndLiterals (dstCapacity=%zu)", dstCapacity); 6442 + assert(cctx != NULL); 6443 + if (litCapacity < litSize) { 6444 + RETURN_ERROR(workSpace_tooSmall, "literals buffer is not large enough: must be at least 8 bytes larger than litSize (risk of read out-of-bound)"); 6445 + } 6446 + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, decompressedSize), "CCtx initialization failed"); 6447 + 6448 + if (cctx->appliedParams.blockDelimiters == ZSTD_sf_noBlockDelimiters) { 6449 + RETURN_ERROR(frameParameter_unsupported, "This mode is only compatible with explicit delimiters"); 6450 + } 6451 + if (cctx->appliedParams.validateSequences) { 6452 + RETURN_ERROR(parameter_unsupported, "This mode is not compatible with Sequence validation"); 6453 + } 6454 + if (cctx->appliedParams.fParams.checksumFlag) { 6455 + RETURN_ERROR(frameParameter_unsupported, "this mode is not compatible with frame checksum"); 6456 + } 6457 + 6458 + /* Begin writing output, starting with frame header */ 6459 + { size_t const frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, 6460 + &cctx->appliedParams, decompressedSize, cctx->dictID); 6461 + op += frameHeaderSize; 6462 + assert(frameHeaderSize <= dstCapacity); 6463 + dstCapacity -= frameHeaderSize; 6464 + cSize += frameHeaderSize; 6465 + } 6466 + 6467 + /* Now generate compressed blocks */ 6468 + { size_t const cBlocksSize = ZSTD_compressSequencesAndLiterals_internal(cctx, 6469 + op, dstCapacity, 6470 + inSeqs, inSeqsSize, 6471 + literals, litSize, decompressedSize); 6472 + FORWARD_IF_ERROR(cBlocksSize, "Compressing blocks failed!"); 6473 + cSize += cBlocksSize; 6474 + assert(cBlocksSize <= dstCapacity); 6475 + dstCapacity -= cBlocksSize; 6476 + } 6477 + 6478 + DEBUGLOG(4, "Final compressed size: %zu", cSize); 6900 6479 return cSize; 6901 6480 } 6902 6481 6903 6482 /*====== Finalize ======*/ 6904 6483 6484 + static ZSTD_inBuffer inBuffer_forEndFlush(const ZSTD_CStream* zcs) 6485 + { 6486 + const ZSTD_inBuffer nullInput = { NULL, 0, 0 }; 6487 + const int stableInput = (zcs->appliedParams.inBufferMode == ZSTD_bm_stable); 6488 + return stableInput ? zcs->expectedInBuffer : nullInput; 6489 + } 6490 + 6905 6491 /*! ZSTD_flushStream() : 6906 6492 * @return : amount of data remaining to flush */ 6907 6493 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) 6908 6494 { 6909 - ZSTD_inBuffer input = { NULL, 0, 0 }; 6495 + ZSTD_inBuffer input = inBuffer_forEndFlush(zcs); 6496 + input.size = input.pos; /* do not ingest more input during flush */ 6910 6497 return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); 6911 6498 } 6912 6499 6913 - 6914 6500 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) 6915 6501 { 6916 - ZSTD_inBuffer input = { NULL, 0, 0 }; 6502 + ZSTD_inBuffer input = inBuffer_forEndFlush(zcs); 6917 6503 size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); 6918 - FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); 6504 + FORWARD_IF_ERROR(remainingToFlush , "ZSTD_compressStream2(,,ZSTD_e_end) failed"); 6919 6505 if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ 6920 6506 /* single thread mode : attempt to calculate remaining to flush more precisely */ 6921 6507 { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; ··· 7522 6046 } 7523 6047 } 7524 6048 7525 - static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) 6049 + static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode) 7526 6050 { 7527 6051 switch (mode) { 7528 6052 case ZSTD_cpm_unknown: ··· 7546 6070 * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. 7547 6071 * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. 7548 6072 * Use dictSize == 0 for unknown or unused. 7549 - * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */ 7550 - static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) 6073 + * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_CParamMode_e`. */ 6074 + static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode) 7551 6075 { 7552 6076 U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode); 7553 6077 U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); ··· 7568 6092 cp.targetLength = (unsigned)(-clampedCompressionLevel); 7569 6093 } 7570 6094 /* refine parameters based on srcSize & dictSize */ 7571 - return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode); 6095 + return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode, ZSTD_ps_auto); 7572 6096 } 7573 6097 } 7574 6098 ··· 7585 6109 * same idea as ZSTD_getCParams() 7586 6110 * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). 7587 6111 * Fields of `ZSTD_frameParameters` are set to default values */ 7588 - static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { 6112 + static ZSTD_parameters 6113 + ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode) 6114 + { 7589 6115 ZSTD_parameters params; 7590 6116 ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); 7591 6117 DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); ··· 7601 6123 * same idea as ZSTD_getCParams() 7602 6124 * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). 7603 6125 * Fields of `ZSTD_frameParameters` are set to default values */ 7604 - ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { 6126 + ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) 6127 + { 7605 6128 if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; 7606 6129 return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); 6130 + } 6131 + 6132 + void ZSTD_registerSequenceProducer( 6133 + ZSTD_CCtx* zc, 6134 + void* extSeqProdState, 6135 + ZSTD_sequenceProducer_F extSeqProdFunc) 6136 + { 6137 + assert(zc != NULL); 6138 + ZSTD_CCtxParams_registerSequenceProducer( 6139 + &zc->requestedParams, extSeqProdState, extSeqProdFunc 6140 + ); 6141 + } 6142 + 6143 + void ZSTD_CCtxParams_registerSequenceProducer( 6144 + ZSTD_CCtx_params* params, 6145 + void* extSeqProdState, 6146 + ZSTD_sequenceProducer_F extSeqProdFunc) 6147 + { 6148 + assert(params != NULL); 6149 + if (extSeqProdFunc != NULL) { 6150 + params->extSeqProdFunc = extSeqProdFunc; 6151 + params->extSeqProdState = extSeqProdState; 6152 + } else { 6153 + params->extSeqProdFunc = NULL; 6154 + params->extSeqProdState = NULL; 6155 + } 7607 6156 }
+429 -200
lib/zstd/compress/zstd_compress_internal.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 21 20 ***************************************/ 22 21 #include "../common/zstd_internal.h" 23 22 #include "zstd_cwksp.h" 24 - 23 + #include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */ 24 + #include "zstd_preSplit.h" /* ZSTD_SLIPBLOCK_WORKSPACESIZE */ 25 25 26 26 /*-************************************* 27 27 * Constants ··· 34 32 It's not a big deal though : candidate will just be sorted again. 35 33 Additionally, candidate position 1 will be lost. 36 34 But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. 37 - The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. 35 + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy. 38 36 This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ 39 37 40 38 ··· 78 76 } ZSTD_entropyCTables_t; 79 77 80 78 /* ********************************************* 79 + * Sequences * 80 + ***********************************************/ 81 + typedef struct SeqDef_s { 82 + U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */ 83 + U16 litLength; 84 + U16 mlBase; /* mlBase == matchLength - MINMATCH */ 85 + } SeqDef; 86 + 87 + /* Controls whether seqStore has a single "long" litLength or matchLength. See SeqStore_t. */ 88 + typedef enum { 89 + ZSTD_llt_none = 0, /* no longLengthType */ 90 + ZSTD_llt_literalLength = 1, /* represents a long literal */ 91 + ZSTD_llt_matchLength = 2 /* represents a long match */ 92 + } ZSTD_longLengthType_e; 93 + 94 + typedef struct { 95 + SeqDef* sequencesStart; 96 + SeqDef* sequences; /* ptr to end of sequences */ 97 + BYTE* litStart; 98 + BYTE* lit; /* ptr to end of literals */ 99 + BYTE* llCode; 100 + BYTE* mlCode; 101 + BYTE* ofCode; 102 + size_t maxNbSeq; 103 + size_t maxNbLit; 104 + 105 + /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength 106 + * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment 107 + * the existing value of the litLength or matchLength by 0x10000. 108 + */ 109 + ZSTD_longLengthType_e longLengthType; 110 + U32 longLengthPos; /* Index of the sequence to apply long length modification to */ 111 + } SeqStore_t; 112 + 113 + typedef struct { 114 + U32 litLength; 115 + U32 matchLength; 116 + } ZSTD_SequenceLength; 117 + 118 + /* 119 + * Returns the ZSTD_SequenceLength for the given sequences. It handles the decoding of long sequences 120 + * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. 121 + */ 122 + MEM_STATIC ZSTD_SequenceLength ZSTD_getSequenceLength(SeqStore_t const* seqStore, SeqDef const* seq) 123 + { 124 + ZSTD_SequenceLength seqLen; 125 + seqLen.litLength = seq->litLength; 126 + seqLen.matchLength = seq->mlBase + MINMATCH; 127 + if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { 128 + if (seqStore->longLengthType == ZSTD_llt_literalLength) { 129 + seqLen.litLength += 0x10000; 130 + } 131 + if (seqStore->longLengthType == ZSTD_llt_matchLength) { 132 + seqLen.matchLength += 0x10000; 133 + } 134 + } 135 + return seqLen; 136 + } 137 + 138 + const SeqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ 139 + int ZSTD_seqToCodes(const SeqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ 140 + 141 + 142 + /* ********************************************* 81 143 * Entropy buffer statistics structs and funcs * 82 144 ***********************************************/ 83 145 /* ZSTD_hufCTablesMetadata_t : ··· 150 84 * hufDesSize refers to the size of huffman tree description in bytes. 151 85 * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */ 152 86 typedef struct { 153 - symbolEncodingType_e hType; 87 + SymbolEncodingType_e hType; 154 88 BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; 155 89 size_t hufDesSize; 156 90 } ZSTD_hufCTablesMetadata_t; ··· 161 95 * fseTablesSize refers to the size of fse tables in bytes. 162 96 * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */ 163 97 typedef struct { 164 - symbolEncodingType_e llType; 165 - symbolEncodingType_e ofType; 166 - symbolEncodingType_e mlType; 98 + SymbolEncodingType_e llType; 99 + SymbolEncodingType_e ofType; 100 + SymbolEncodingType_e mlType; 167 101 BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; 168 102 size_t fseTablesSize; 169 103 size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ ··· 177 111 /* ZSTD_buildBlockEntropyStats() : 178 112 * Builds entropy for the block. 179 113 * @return : 0 on success or error code */ 180 - size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, 181 - const ZSTD_entropyCTables_t* prevEntropy, 182 - ZSTD_entropyCTables_t* nextEntropy, 183 - const ZSTD_CCtx_params* cctxParams, 184 - ZSTD_entropyCTablesMetadata_t* entropyMetadata, 185 - void* workspace, size_t wkspSize); 114 + size_t ZSTD_buildBlockEntropyStats( 115 + const SeqStore_t* seqStorePtr, 116 + const ZSTD_entropyCTables_t* prevEntropy, 117 + ZSTD_entropyCTables_t* nextEntropy, 118 + const ZSTD_CCtx_params* cctxParams, 119 + ZSTD_entropyCTablesMetadata_t* entropyMetadata, 120 + void* workspace, size_t wkspSize); 186 121 187 122 /* ******************************* 188 123 * Compression internals structs * ··· 207 140 stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */ 208 141 size_t size; /* The number of sequences. <= capacity. */ 209 142 size_t capacity; /* The capacity starting from `seq` pointer */ 210 - } rawSeqStore_t; 143 + } RawSeqStore_t; 211 144 212 - UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; 145 + UNUSED_ATTR static const RawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; 213 146 214 147 typedef struct { 215 - int price; 216 - U32 off; 217 - U32 mlen; 218 - U32 litlen; 219 - U32 rep[ZSTD_REP_NUM]; 148 + int price; /* price from beginning of segment to this position */ 149 + U32 off; /* offset of previous match */ 150 + U32 mlen; /* length of previous match */ 151 + U32 litlen; /* nb of literals since previous match */ 152 + U32 rep[ZSTD_REP_NUM]; /* offset history after previous match */ 220 153 } ZSTD_optimal_t; 221 154 222 155 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; 223 156 157 + #define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3) 224 158 typedef struct { 225 159 /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ 226 160 unsigned* litFreq; /* table of literals statistics, of size 256 */ 227 161 unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ 228 162 unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ 229 163 unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ 230 - ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ 231 - ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ 164 + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_SIZE */ 165 + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */ 232 166 233 167 U32 litSum; /* nb of literals */ 234 168 U32 litLengthSum; /* nb of litLength codes */ ··· 241 173 U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ 242 174 ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ 243 175 const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ 244 - ZSTD_paramSwitch_e literalCompressionMode; 176 + ZSTD_ParamSwitch_e literalCompressionMode; 245 177 } optState_t; 246 178 247 179 typedef struct { ··· 263 195 264 196 #define ZSTD_WINDOW_START_INDEX 2 265 197 266 - typedef struct ZSTD_matchState_t ZSTD_matchState_t; 198 + typedef struct ZSTD_MatchState_t ZSTD_MatchState_t; 267 199 268 200 #define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */ 269 201 270 - struct ZSTD_matchState_t { 202 + struct ZSTD_MatchState_t { 271 203 ZSTD_window_t window; /* State for window round buffer management */ 272 204 U32 loadedDictEnd; /* index of end of dictionary, within context's referential. 273 205 * When loadedDictEnd != 0, a dictionary is in use, and still valid. ··· 280 212 U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ 281 213 282 214 U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ 283 - U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ 215 + BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ 284 216 U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ 217 + U64 hashSalt; /* For row-based matchFinder: salts the hash for reuse of tag table */ 218 + U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */ 285 219 286 220 U32* hashTable; 287 221 U32* hashTable3; 288 222 U32* chainTable; 289 223 290 - U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */ 224 + int forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */ 291 225 292 226 int dedicatedDictSearch; /* Indicates whether this matchState is using the 293 227 * dedicated dictionary search structure. 294 228 */ 295 229 optState_t opt; /* optimal parser state */ 296 - const ZSTD_matchState_t* dictMatchState; 230 + const ZSTD_MatchState_t* dictMatchState; 297 231 ZSTD_compressionParameters cParams; 298 - const rawSeqStore_t* ldmSeqStore; 232 + const RawSeqStore_t* ldmSeqStore; 233 + 234 + /* Controls prefetching in some dictMatchState matchfinders. 235 + * This behavior is controlled from the cctx ms. 236 + * This parameter has no effect in the cdict ms. */ 237 + int prefetchCDictTables; 238 + 239 + /* When == 0, lazy match finders insert every position. 240 + * When != 0, lazy match finders only insert positions they search. 241 + * This allows them to skip much faster over incompressible data, 242 + * at a small cost to compression ratio. 243 + */ 244 + int lazySkipping; 299 245 }; 300 246 301 247 typedef struct { 302 248 ZSTD_compressedBlockState_t* prevCBlock; 303 249 ZSTD_compressedBlockState_t* nextCBlock; 304 - ZSTD_matchState_t matchState; 250 + ZSTD_MatchState_t matchState; 305 251 } ZSTD_blockState_t; 306 252 307 253 typedef struct { ··· 342 260 } ldmState_t; 343 261 344 262 typedef struct { 345 - ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */ 263 + ZSTD_ParamSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */ 346 264 U32 hashLog; /* Log size of hashTable */ 347 265 U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ 348 266 U32 minMatchLength; /* Minimum match length */ ··· 373 291 * There is no guarantee that hint is close to actual source size */ 374 292 375 293 ZSTD_dictAttachPref_e attachDictPref; 376 - ZSTD_paramSwitch_e literalCompressionMode; 294 + ZSTD_ParamSwitch_e literalCompressionMode; 377 295 378 296 /* Multithreading: used to pass parameters to mtctx */ 379 297 int nbWorkers; ··· 392 310 ZSTD_bufferMode_e outBufferMode; 393 311 394 312 /* Sequence compression API */ 395 - ZSTD_sequenceFormat_e blockDelimiters; 313 + ZSTD_SequenceFormat_e blockDelimiters; 396 314 int validateSequences; 397 315 398 - /* Block splitting */ 399 - ZSTD_paramSwitch_e useBlockSplitter; 316 + /* Block splitting 317 + * @postBlockSplitter executes split analysis after sequences are produced, 318 + * it's more accurate but consumes more resources. 319 + * @preBlockSplitter_level splits before knowing sequences, 320 + * it's more approximative but also cheaper. 321 + * Valid @preBlockSplitter_level values range from 0 to 6 (included). 322 + * 0 means auto, 1 means do not split, 323 + * then levels are sorted in increasing cpu budget, from 2 (fastest) to 6 (slowest). 324 + * Highest @preBlockSplitter_level combines well with @postBlockSplitter. 325 + */ 326 + ZSTD_ParamSwitch_e postBlockSplitter; 327 + int preBlockSplitter_level; 328 + 329 + /* Adjust the max block size*/ 330 + size_t maxBlockSize; 400 331 401 332 /* Param for deciding whether to use row-based matchfinder */ 402 - ZSTD_paramSwitch_e useRowMatchFinder; 333 + ZSTD_ParamSwitch_e useRowMatchFinder; 403 334 404 335 /* Always load a dictionary in ext-dict mode (not prefix mode)? */ 405 336 int deterministicRefPrefix; 406 337 407 338 /* Internal use, for createCCtxParams() and freeCCtxParams() only */ 408 339 ZSTD_customMem customMem; 340 + 341 + /* Controls prefetching in some dictMatchState matchfinders */ 342 + ZSTD_ParamSwitch_e prefetchCDictTables; 343 + 344 + /* Controls whether zstd will fall back to an internal matchfinder 345 + * if the external matchfinder returns an error code. */ 346 + int enableMatchFinderFallback; 347 + 348 + /* Parameters for the external sequence producer API. 349 + * Users set these parameters through ZSTD_registerSequenceProducer(). 350 + * It is not possible to set these parameters individually through the public API. */ 351 + void* extSeqProdState; 352 + ZSTD_sequenceProducer_F extSeqProdFunc; 353 + 354 + /* Controls repcode search in external sequence parsing */ 355 + ZSTD_ParamSwitch_e searchForExternalRepcodes; 409 356 }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ 410 357 411 358 #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) 412 359 #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE) 360 + #define TMP_WORKSPACE_SIZE (MAX(ENTROPY_WORKSPACE_SIZE, ZSTD_SLIPBLOCK_WORKSPACESIZE)) 413 361 414 362 /* 415 363 * Indicates whether this compression proceeds directly from user-provided ··· 457 345 */ 458 346 #define ZSTD_MAX_NB_BLOCK_SPLITS 196 459 347 typedef struct { 460 - seqStore_t fullSeqStoreChunk; 461 - seqStore_t firstHalfSeqStore; 462 - seqStore_t secondHalfSeqStore; 463 - seqStore_t currSeqStore; 464 - seqStore_t nextSeqStore; 348 + SeqStore_t fullSeqStoreChunk; 349 + SeqStore_t firstHalfSeqStore; 350 + SeqStore_t secondHalfSeqStore; 351 + SeqStore_t currSeqStore; 352 + SeqStore_t nextSeqStore; 465 353 466 354 U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS]; 467 355 ZSTD_entropyCTablesMetadata_t entropyMetadata; ··· 478 366 size_t dictContentSize; 479 367 480 368 ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ 481 - size_t blockSize; 369 + size_t blockSizeMax; 482 370 unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ 483 371 unsigned long long consumedSrcSize; 484 372 unsigned long long producedCSize; ··· 490 378 int isFirstBlock; 491 379 int initialized; 492 380 493 - seqStore_t seqStore; /* sequences storage ptrs */ 381 + SeqStore_t seqStore; /* sequences storage ptrs */ 494 382 ldmState_t ldmState; /* long distance matching state */ 495 383 rawSeq* ldmSequences; /* Storage for the ldm output sequences */ 496 384 size_t maxNbLdmSequences; 497 - rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */ 385 + RawSeqStore_t externSeqStore; /* Mutable reference to external sequences */ 498 386 ZSTD_blockState_t blockState; 499 - U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */ 387 + void* tmpWorkspace; /* used as substitute of stack space - must be aligned for S64 type */ 388 + size_t tmpWkspSize; 500 389 501 390 /* Whether we are streaming or not */ 502 391 ZSTD_buffered_policy_e bufferedPolicy; ··· 517 404 518 405 /* Stable in/out buffer verification */ 519 406 ZSTD_inBuffer expectedInBuffer; 407 + size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */ 520 408 size_t expectedOutBufferSize; 521 409 522 410 /* Dictionary */ ··· 531 417 532 418 /* Workspace for block splitter */ 533 419 ZSTD_blockSplitCtx blockSplitCtx; 420 + 421 + /* Buffer for output from external sequence producer */ 422 + ZSTD_Sequence* extSeqBuf; 423 + size_t extSeqBufCapacity; 534 424 }; 535 425 536 426 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; 427 + typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e; 537 428 538 429 typedef enum { 539 430 ZSTD_noDict = 0, ··· 560 441 * In this mode we take both the source size and the dictionary size 561 442 * into account when selecting and adjusting the parameters. 562 443 */ 563 - ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams. 444 + ZSTD_cpm_unknown = 3 /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams. 564 445 * We don't know what these parameters are for. We default to the legacy 565 446 * behavior of taking both the source size and the dict size into account 566 447 * when selecting and adjusting parameters. 567 448 */ 568 - } ZSTD_cParamMode_e; 449 + } ZSTD_CParamMode_e; 569 450 570 - typedef size_t (*ZSTD_blockCompressor) ( 571 - ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 451 + typedef size_t (*ZSTD_BlockCompressor_f) ( 452 + ZSTD_MatchState_t* bs, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 572 453 void const* src, size_t srcSize); 573 - ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); 454 + ZSTD_BlockCompressor_f ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_ParamSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); 574 455 575 456 576 457 MEM_STATIC U32 ZSTD_LLcode(U32 litLength) ··· 616 497 return 1; 617 498 } 618 499 500 + /* ZSTD_selectAddr: 501 + * @return index >= lowLimit ? candidate : backup, 502 + * tries to force branchless codegen. */ 503 + MEM_STATIC const BYTE* 504 + ZSTD_selectAddr(U32 index, U32 lowLimit, const BYTE* candidate, const BYTE* backup) 505 + { 506 + #if defined(__x86_64__) 507 + __asm__ ( 508 + "cmp %1, %2\n" 509 + "cmova %3, %0\n" 510 + : "+r"(candidate) 511 + : "r"(index), "r"(lowLimit), "r"(backup) 512 + ); 513 + return candidate; 514 + #else 515 + return index >= lowLimit ? candidate : backup; 516 + #endif 517 + } 518 + 619 519 /* ZSTD_noCompressBlock() : 620 520 * Writes uncompressed block to dst buffer from given src. 621 521 * Returns the size of the block */ 622 - MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) 522 + MEM_STATIC size_t 523 + ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) 623 524 { 624 525 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); 526 + DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity); 625 527 RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, 626 528 dstSize_tooSmall, "dst buf too small for uncompressed block"); 627 529 MEM_writeLE24(dst, cBlockHeader24); ··· 650 510 return ZSTD_blockHeaderSize + srcSize; 651 511 } 652 512 653 - MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) 513 + MEM_STATIC size_t 514 + ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) 654 515 { 655 516 BYTE* const op = (BYTE*)dst; 656 517 U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); ··· 670 529 { 671 530 U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; 672 531 ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); 673 - assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); 532 + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat)); 674 533 return (srcSize >> minlog) + 2; 675 534 } 676 535 ··· 706 565 while (ip < iend) *op++ = *ip++; 707 566 } 708 567 709 - #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) 710 - #define STORE_REPCODE_1 STORE_REPCODE(1) 711 - #define STORE_REPCODE_2 STORE_REPCODE(2) 712 - #define STORE_REPCODE_3 STORE_REPCODE(3) 713 - #define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1) 714 - #define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE) 715 - #define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE) 716 - #define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE) 717 - #define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE) 718 - #define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */ 719 - #define STORED_TO_OFFBASE(o) ((o)+1) 720 - #define OFFBASE_TO_STORED(o) ((o)-1) 721 568 722 - /*! ZSTD_storeSeq() : 723 - * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t. 724 - * @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET(). 569 + #define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1) 570 + #define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2) 571 + #define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3) 572 + #define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */ 573 + #define OFFSET_TO_OFFBASE(o) (assert((o)>0), o + ZSTD_REP_NUM) 574 + #define OFFBASE_IS_OFFSET(o) ((o) > ZSTD_REP_NUM) 575 + #define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM) 576 + #define OFFBASE_TO_OFFSET(o) (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM) 577 + #define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o)) /* returns ID 1,2,3 */ 578 + 579 + /*! ZSTD_storeSeqOnly() : 580 + * Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t. 581 + * Literals themselves are not copied, but @litPtr is updated. 582 + * @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE(). 725 583 * @matchLength : must be >= MINMATCH 726 - * Allowed to overread literals up to litLimit. 727 584 */ 728 585 HINT_INLINE UNUSED_ATTR void 729 - ZSTD_storeSeq(seqStore_t* seqStorePtr, 730 - size_t litLength, const BYTE* literals, const BYTE* litLimit, 731 - U32 offBase_minus1, 586 + ZSTD_storeSeqOnly(SeqStore_t* seqStorePtr, 587 + size_t litLength, 588 + U32 offBase, 732 589 size_t matchLength) 733 590 { 734 - BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; 735 - BYTE const* const litEnd = literals + litLength; 736 - #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) 737 - static const BYTE* g_start = NULL; 738 - if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ 739 - { U32 const pos = (U32)((const BYTE*)literals - g_start); 740 - DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", 741 - pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1); 742 - } 743 - #endif 744 591 assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); 745 - /* copy Literals */ 746 - assert(seqStorePtr->maxNbLit <= 128 KB); 747 - assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); 748 - assert(literals + litLength <= litLimit); 749 - if (litEnd <= litLimit_w) { 750 - /* Common case we can use wildcopy. 751 - * First copy 16 bytes, because literals are likely short. 752 - */ 753 - assert(WILDCOPY_OVERLENGTH >= 16); 754 - ZSTD_copy16(seqStorePtr->lit, literals); 755 - if (litLength > 16) { 756 - ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); 757 - } 758 - } else { 759 - ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); 760 - } 761 - seqStorePtr->lit += litLength; 762 592 763 593 /* literal Length */ 764 - if (litLength>0xFFFF) { 594 + assert(litLength <= ZSTD_BLOCKSIZE_MAX); 595 + if (UNLIKELY(litLength>0xFFFF)) { 765 596 assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ 766 597 seqStorePtr->longLengthType = ZSTD_llt_literalLength; 767 598 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); ··· 741 628 seqStorePtr->sequences[0].litLength = (U16)litLength; 742 629 743 630 /* match offset */ 744 - seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1); 631 + seqStorePtr->sequences[0].offBase = offBase; 745 632 746 633 /* match Length */ 634 + assert(matchLength <= ZSTD_BLOCKSIZE_MAX); 747 635 assert(matchLength >= MINMATCH); 748 636 { size_t const mlBase = matchLength - MINMATCH; 749 - if (mlBase>0xFFFF) { 637 + if (UNLIKELY(mlBase>0xFFFF)) { 750 638 assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ 751 639 seqStorePtr->longLengthType = ZSTD_llt_matchLength; 752 640 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); ··· 758 644 seqStorePtr->sequences++; 759 645 } 760 646 647 + /*! ZSTD_storeSeq() : 648 + * Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t. 649 + * @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE(). 650 + * @matchLength : must be >= MINMATCH 651 + * Allowed to over-read literals up to litLimit. 652 + */ 653 + HINT_INLINE UNUSED_ATTR void 654 + ZSTD_storeSeq(SeqStore_t* seqStorePtr, 655 + size_t litLength, const BYTE* literals, const BYTE* litLimit, 656 + U32 offBase, 657 + size_t matchLength) 658 + { 659 + BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; 660 + BYTE const* const litEnd = literals + litLength; 661 + #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) 662 + static const BYTE* g_start = NULL; 663 + if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ 664 + { U32 const pos = (U32)((const BYTE*)literals - g_start); 665 + DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u", 666 + pos, (U32)litLength, (U32)matchLength, (U32)offBase); 667 + } 668 + #endif 669 + assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); 670 + /* copy Literals */ 671 + assert(seqStorePtr->maxNbLit <= 128 KB); 672 + assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); 673 + assert(literals + litLength <= litLimit); 674 + if (litEnd <= litLimit_w) { 675 + /* Common case we can use wildcopy. 676 + * First copy 16 bytes, because literals are likely short. 677 + */ 678 + ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16); 679 + ZSTD_copy16(seqStorePtr->lit, literals); 680 + if (litLength > 16) { 681 + ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); 682 + } 683 + } else { 684 + ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); 685 + } 686 + seqStorePtr->lit += litLength; 687 + 688 + ZSTD_storeSeqOnly(seqStorePtr, litLength, offBase, matchLength); 689 + } 690 + 761 691 /* ZSTD_updateRep() : 762 692 * updates in-place @rep (array of repeat offsets) 763 - * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq() 693 + * @offBase : sum-type, using numeric representation of ZSTD_storeSeq() 764 694 */ 765 695 MEM_STATIC void 766 - ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) 696 + ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) 767 697 { 768 - if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */ 698 + if (OFFBASE_IS_OFFSET(offBase)) { /* full offset */ 769 699 rep[2] = rep[1]; 770 700 rep[1] = rep[0]; 771 - rep[0] = STORED_OFFSET(offBase_minus1); 701 + rep[0] = OFFBASE_TO_OFFSET(offBase); 772 702 } else { /* repcode */ 773 - U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0; 703 + U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; 774 704 if (repCode > 0) { /* note : if repCode==0, no change */ 775 705 U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; 776 706 rep[2] = (repCode >= 2) ? rep[1] : rep[2]; ··· 828 670 829 671 typedef struct repcodes_s { 830 672 U32 rep[3]; 831 - } repcodes_t; 673 + } Repcodes_t; 832 674 833 - MEM_STATIC repcodes_t 834 - ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) 675 + MEM_STATIC Repcodes_t 676 + ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) 835 677 { 836 - repcodes_t newReps; 678 + Repcodes_t newReps; 837 679 ZSTD_memcpy(&newReps, rep, sizeof(newReps)); 838 - ZSTD_updateRep(newReps.rep, offBase_minus1, ll0); 680 + ZSTD_updateRep(newReps.rep, offBase, ll0); 839 681 return newReps; 840 682 } 841 683 ··· 843 685 /*-************************************* 844 686 * Match length counter 845 687 ***************************************/ 846 - static unsigned ZSTD_NbCommonBytes (size_t val) 847 - { 848 - if (MEM_isLittleEndian()) { 849 - if (MEM_64bits()) { 850 - # if (__GNUC__ >= 4) 851 - return (__builtin_ctzll((U64)val) >> 3); 852 - # else 853 - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 854 - 0, 3, 1, 3, 1, 4, 2, 7, 855 - 0, 2, 3, 6, 1, 5, 3, 5, 856 - 1, 3, 4, 4, 2, 5, 6, 7, 857 - 7, 0, 1, 2, 3, 3, 4, 6, 858 - 2, 6, 5, 5, 3, 4, 5, 6, 859 - 7, 1, 2, 4, 6, 4, 4, 5, 860 - 7, 2, 6, 5, 7, 6, 7, 7 }; 861 - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; 862 - # endif 863 - } else { /* 32 bits */ 864 - # if (__GNUC__ >= 3) 865 - return (__builtin_ctz((U32)val) >> 3); 866 - # else 867 - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 868 - 3, 2, 2, 1, 3, 2, 0, 1, 869 - 3, 3, 1, 2, 2, 2, 2, 0, 870 - 3, 1, 2, 0, 1, 0, 1, 1 }; 871 - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 872 - # endif 873 - } 874 - } else { /* Big Endian CPU */ 875 - if (MEM_64bits()) { 876 - # if (__GNUC__ >= 4) 877 - return (__builtin_clzll(val) >> 3); 878 - # else 879 - unsigned r; 880 - const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ 881 - if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } 882 - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } 883 - r += (!val); 884 - return r; 885 - # endif 886 - } else { /* 32 bits */ 887 - # if (__GNUC__ >= 3) 888 - return (__builtin_clz((U32)val) >> 3); 889 - # else 890 - unsigned r; 891 - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } 892 - r += (!val); 893 - return r; 894 - # endif 895 - } } 896 - } 897 - 898 - 899 688 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) 900 689 { 901 690 const BYTE* const pStart = pIn; ··· 876 771 size_t const matchLength = ZSTD_count(ip, match, vEnd); 877 772 if (match + matchLength != mEnd) return matchLength; 878 773 DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength); 879 - DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match); 880 - DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip); 774 + DEBUGLOG(7, "distance from match beginning to end dictionary = %i", (int)(mEnd - match)); 775 + DEBUGLOG(7, "distance from current pos to end buffer = %i", (int)(iEnd - ip)); 881 776 DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart); 882 777 DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd)); 883 778 return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd); ··· 888 783 * Hashes 889 784 ***************************************/ 890 785 static const U32 prime3bytes = 506832829U; 891 - static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; } 892 - MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ 786 + static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s) >> (32-h) ; } 787 + MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */ 788 + MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); } 893 789 894 790 static const U32 prime4bytes = 2654435761U; 895 - static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } 896 - static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } 791 + static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; } 792 + static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); } 793 + static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); } 897 794 898 795 static const U64 prime5bytes = 889523592379ULL; 899 - static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } 900 - static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } 796 + static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; } 797 + static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); } 798 + static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); } 901 799 902 800 static const U64 prime6bytes = 227718039650203ULL; 903 - static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } 904 - static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } 801 + static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; } 802 + static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); } 803 + static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); } 905 804 906 805 static const U64 prime7bytes = 58295818150454627ULL; 907 - static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } 908 - static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } 806 + static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; } 807 + static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); } 808 + static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); } 909 809 910 810 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; 911 - static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } 912 - static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } 811 + static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; } 812 + static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); } 813 + static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); } 814 + 913 815 914 816 MEM_STATIC FORCE_INLINE_ATTR 915 817 size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) 916 818 { 819 + /* Although some of these hashes do support hBits up to 64, some do not. 820 + * To be on the safe side, always avoid hBits > 32. */ 821 + assert(hBits <= 32); 822 + 917 823 switch(mls) 918 824 { 919 825 default: ··· 935 819 case 8: return ZSTD_hash8Ptr(p, hBits); 936 820 } 937 821 } 822 + 823 + MEM_STATIC FORCE_INLINE_ATTR 824 + size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) { 825 + /* Although some of these hashes do support hBits up to 64, some do not. 826 + * To be on the safe side, always avoid hBits > 32. */ 827 + assert(hBits <= 32); 828 + 829 + switch(mls) 830 + { 831 + default: 832 + case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt); 833 + case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt); 834 + case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt); 835 + case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt); 836 + case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt); 837 + } 838 + } 839 + 938 840 939 841 /* ZSTD_ipow() : 940 842 * Return base^exponent. ··· 1015 881 /*-************************************* 1016 882 * Round buffer management 1017 883 ***************************************/ 1018 - #if (ZSTD_WINDOWLOG_MAX_64 > 31) 1019 - # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" 1020 - #endif 1021 - /* Max current allowed */ 1022 - #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) 884 + /* Max @current value allowed: 885 + * In 32-bit mode: we want to avoid crossing the 2 GB limit, 886 + * reducing risks of side effects in case of signed operations on indexes. 887 + * In 64-bit mode: we want to ensure that adding the maximum job size (512 MB) 888 + * doesn't overflow U32 index capacity (4 GB) */ 889 + #define ZSTD_CURRENT_MAX (MEM_64bits() ? 3500U MB : 2000U MB) 1023 890 /* Maximum chunk size before overflow correction needs to be called again */ 1024 891 #define ZSTD_CHUNKSIZE_MAX \ 1025 892 ( ((U32)-1) /* Maximum ending current index */ \ ··· 1060 925 * Inspects the provided matchState and figures out what dictMode should be 1061 926 * passed to the compressor. 1062 927 */ 1063 - MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) 928 + MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_MatchState_t *ms) 1064 929 { 1065 930 return ZSTD_window_hasExtDict(ms->window) ? 1066 931 ZSTD_extDict : ··· 1146 1011 * The least significant cycleLog bits of the indices must remain the same, 1147 1012 * which may be 0. Every index up to maxDist in the past must be valid. 1148 1013 */ 1149 - MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, 1014 + MEM_STATIC 1015 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1016 + U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, 1150 1017 U32 maxDist, void const* src) 1151 1018 { 1152 1019 /* preemptive overflow correction: ··· 1249 1112 const void* blockEnd, 1250 1113 U32 maxDist, 1251 1114 U32* loadedDictEndPtr, 1252 - const ZSTD_matchState_t** dictMatchStatePtr) 1115 + const ZSTD_MatchState_t** dictMatchStatePtr) 1253 1116 { 1254 1117 U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); 1255 1118 U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; ··· 1294 1157 const void* blockEnd, 1295 1158 U32 maxDist, 1296 1159 U32* loadedDictEndPtr, 1297 - const ZSTD_matchState_t** dictMatchStatePtr) 1160 + const ZSTD_MatchState_t** dictMatchStatePtr) 1298 1161 { 1299 1162 assert(loadedDictEndPtr != NULL); 1300 1163 assert(dictMatchStatePtr != NULL); ··· 1304 1167 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); 1305 1168 assert(blockEndIdx >= loadedDictEnd); 1306 1169 1307 - if (blockEndIdx > loadedDictEnd + maxDist) { 1170 + if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) { 1308 1171 /* On reaching window size, dictionaries are invalidated. 1309 1172 * For simplification, if window size is reached anywhere within next block, 1310 1173 * the dictionary is invalidated for the full block. 1174 + * 1175 + * We also have to invalidate the dictionary if ZSTD_window_update() has detected 1176 + * non-contiguous segments, which means that loadedDictEnd != window->dictLimit. 1177 + * loadedDictEnd may be 0, if forceWindow is true, but in that case we never use 1178 + * dictMatchState, so setting it to NULL is not a problem. 1311 1179 */ 1312 1180 DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); 1313 1181 *loadedDictEndPtr = 0; ··· 1341 1199 * forget about the extDict. Handles overlap of the prefix and extDict. 1342 1200 * Returns non-zero if the segment is contiguous. 1343 1201 */ 1344 - MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, 1345 - void const* src, size_t srcSize, 1346 - int forceNonContiguous) 1202 + MEM_STATIC 1203 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1204 + U32 ZSTD_window_update(ZSTD_window_t* window, 1205 + const void* src, size_t srcSize, 1206 + int forceNonContiguous) 1347 1207 { 1348 1208 BYTE const* const ip = (BYTE const*)src; 1349 1209 U32 contiguous = 1; ··· 1372 1228 /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ 1373 1229 if ( (ip+srcSize > window->dictBase + window->lowLimit) 1374 1230 & (ip < window->dictBase + window->dictLimit)) { 1375 - ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; 1376 - U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; 1231 + size_t const highInputIdx = (size_t)((ip + srcSize) - window->dictBase); 1232 + U32 const lowLimitMax = (highInputIdx > (size_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; 1233 + assert(highInputIdx < UINT_MAX); 1377 1234 window->lowLimit = lowLimitMax; 1378 1235 DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); 1379 1236 } ··· 1384 1239 /* 1385 1240 * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. 1386 1241 */ 1387 - MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) 1242 + MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog) 1388 1243 { 1389 1244 U32 const maxDistance = 1U << windowLog; 1390 1245 U32 const lowestValid = ms->window.lowLimit; ··· 1401 1256 /* 1402 1257 * Returns the lowest allowed match index in the prefix. 1403 1258 */ 1404 - MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) 1259 + MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog) 1405 1260 { 1406 1261 U32 const maxDistance = 1U << windowLog; 1407 1262 U32 const lowestValid = ms->window.dictLimit; ··· 1414 1269 return matchLowest; 1415 1270 } 1416 1271 1272 + /* index_safety_check: 1273 + * intentional underflow : ensure repIndex isn't overlapping dict + prefix 1274 + * @return 1 if values are not overlapping, 1275 + * 0 otherwise */ 1276 + MEM_STATIC int ZSTD_index_overlap_check(const U32 prefixLowestIndex, const U32 repIndex) { 1277 + return ((U32)((prefixLowestIndex-1) - repIndex) >= 3); 1278 + } 1417 1279 1418 1280 1419 1281 /* debug functions */ ··· 1454 1302 1455 1303 #endif 1456 1304 1305 + /* Short Cache */ 1457 1306 1307 + /* Normally, zstd matchfinders follow this flow: 1308 + * 1. Compute hash at ip 1309 + * 2. Load index from hashTable[hash] 1310 + * 3. Check if *ip == *(base + index) 1311 + * In dictionary compression, loading *(base + index) is often an L2 or even L3 miss. 1312 + * 1313 + * Short cache is an optimization which allows us to avoid step 3 most of the time 1314 + * when the data doesn't actually match. With short cache, the flow becomes: 1315 + * 1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip. 1316 + * 2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works. 1317 + * 3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue. 1318 + * 1319 + * Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to 1320 + * dictMatchState matchfinders. 1321 + */ 1322 + #define ZSTD_SHORT_CACHE_TAG_BITS 8 1323 + #define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1) 1324 + 1325 + /* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable. 1326 + * Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */ 1327 + MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) { 1328 + size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS; 1329 + U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK); 1330 + assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0); 1331 + hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag; 1332 + } 1333 + 1334 + /* Helper function for short cache matchfinders. 1335 + * Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */ 1336 + MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) { 1337 + U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK; 1338 + U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK; 1339 + return tag1 == tag2; 1340 + } 1458 1341 1459 1342 /* =============================================================== 1460 1343 * Shared internal declarations ··· 1506 1319 1507 1320 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); 1508 1321 1322 + typedef struct { 1323 + U32 idx; /* Index in array of ZSTD_Sequence */ 1324 + U32 posInSequence; /* Position within sequence at idx */ 1325 + size_t posInSrc; /* Number of bytes given by sequences provided so far */ 1326 + } ZSTD_SequencePosition; 1327 + 1328 + /* for benchmark */ 1329 + size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx, 1330 + const ZSTD_Sequence* const inSeqs, size_t nbSequences, 1331 + int const repcodeResolution); 1332 + 1333 + typedef struct { 1334 + size_t nbSequences; 1335 + size_t blockSize; 1336 + size_t litSize; 1337 + } BlockSummary; 1338 + 1339 + BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs); 1340 + 1509 1341 /* ============================================================== 1510 1342 * Private declarations 1511 1343 * These prototypes shall only be called from within lib/compress ··· 1536 1330 * Note: srcSizeHint == 0 means 0! 1537 1331 */ 1538 1332 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( 1539 - const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); 1333 + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode); 1540 1334 1541 1335 /*! ZSTD_initCStream_internal() : 1542 1336 * Private use only. Init streaming operation. ··· 1548 1342 const ZSTD_CDict* cdict, 1549 1343 const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); 1550 1344 1551 - void ZSTD_resetSeqStore(seqStore_t* ssPtr); 1345 + void ZSTD_resetSeqStore(SeqStore_t* ssPtr); 1552 1346 1553 1347 /*! ZSTD_getCParamsFromCDict() : 1554 1348 * as the name implies */ ··· 1587 1381 * This cannot be used when long range matching is enabled. 1588 1382 * Zstd will use these sequences, and pass the literals to a secondary block 1589 1383 * compressor. 1590 - * @return : An error code on failure. 1591 1384 * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory 1592 1385 * access and data corruption. 1593 1386 */ 1594 - size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); 1387 + void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); 1595 1388 1596 1389 /* ZSTD_cycleLog() : 1597 1390 * condition for correct operation : hashLog > 1 */ ··· 1600 1395 * Trace the end of a compression call. 1601 1396 */ 1602 1397 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); 1398 + 1399 + /* Returns 1 if an external sequence producer is registered, otherwise returns 0. */ 1400 + MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) { 1401 + return params->extSeqProdFunc != NULL; 1402 + } 1403 + 1404 + /* =============================================================== 1405 + * Deprecated definitions that are still used internally to avoid 1406 + * deprecation warnings. These functions are exactly equivalent to 1407 + * their public variants, but avoid the deprecation warnings. 1408 + * =============================================================== */ 1409 + 1410 + size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); 1411 + 1412 + size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx, 1413 + void* dst, size_t dstCapacity, 1414 + const void* src, size_t srcSize); 1415 + 1416 + size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx, 1417 + void* dst, size_t dstCapacity, 1418 + const void* src, size_t srcSize); 1419 + 1420 + size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); 1421 + 1603 1422 1604 1423 #endif /* ZSTD_COMPRESS_H */
+117 -40
lib/zstd/compress/zstd_compress_literals.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 14 13 ***************************************/ 15 14 #include "zstd_compress_literals.h" 16 15 16 + 17 + /* ************************************************************** 18 + * Debug Traces 19 + ****************************************************************/ 20 + #if DEBUGLEVEL >= 2 21 + 22 + static size_t showHexa(const void* src, size_t srcSize) 23 + { 24 + const BYTE* const ip = (const BYTE*)src; 25 + size_t u; 26 + for (u=0; u<srcSize; u++) { 27 + RAWLOG(5, " %02X", ip[u]); (void)ip; 28 + } 29 + RAWLOG(5, " \n"); 30 + return srcSize; 31 + } 32 + 33 + #endif 34 + 35 + 36 + /* ************************************************************** 37 + * Literals compression - special cases 38 + ****************************************************************/ 17 39 size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) 18 40 { 19 41 BYTE* const ostart = (BYTE*)dst; 20 42 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); 43 + 44 + DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity); 21 45 22 46 RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); 23 47 ··· 62 36 } 63 37 64 38 ZSTD_memcpy(ostart + flSize, src, srcSize); 65 - DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); 39 + DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); 66 40 return srcSize + flSize; 41 + } 42 + 43 + static int allBytesIdentical(const void* src, size_t srcSize) 44 + { 45 + assert(srcSize >= 1); 46 + assert(src != NULL); 47 + { const BYTE b = ((const BYTE*)src)[0]; 48 + size_t p; 49 + for (p=1; p<srcSize; p++) { 50 + if (((const BYTE*)src)[p] != b) return 0; 51 + } 52 + return 1; 53 + } 67 54 } 68 55 69 56 size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) ··· 84 45 BYTE* const ostart = (BYTE*)dst; 85 46 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); 86 47 87 - (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ 48 + assert(dstCapacity >= 4); (void)dstCapacity; 49 + assert(allBytesIdentical(src, srcSize)); 88 50 89 51 switch(flSize) 90 52 { ··· 103 63 } 104 64 105 65 ostart[flSize] = *(const BYTE*)src; 106 - DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); 66 + DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1); 107 67 return flSize+1; 108 68 } 109 69 110 - size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, 111 - ZSTD_hufCTables_t* nextHuf, 112 - ZSTD_strategy strategy, int disableLiteralCompression, 113 - void* dst, size_t dstCapacity, 114 - const void* src, size_t srcSize, 115 - void* entropyWorkspace, size_t entropyWorkspaceSize, 116 - const int bmi2, 117 - unsigned suspectUncompressible) 70 + /* ZSTD_minLiteralsToCompress() : 71 + * returns minimal amount of literals 72 + * for literal compression to even be attempted. 73 + * Minimum is made tighter as compression strategy increases. 74 + */ 75 + static size_t 76 + ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat) 118 77 { 119 - size_t const minGain = ZSTD_minGain(srcSize, strategy); 78 + assert((int)strategy >= 0); 79 + assert((int)strategy <= 9); 80 + /* btultra2 : min 8 bytes; 81 + * then 2x larger for each successive compression strategy 82 + * max threshold 64 bytes */ 83 + { int const shift = MIN(9-(int)strategy, 3); 84 + size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift; 85 + DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc); 86 + return mintc; 87 + } 88 + } 89 + 90 + size_t ZSTD_compressLiterals ( 91 + void* dst, size_t dstCapacity, 92 + const void* src, size_t srcSize, 93 + void* entropyWorkspace, size_t entropyWorkspaceSize, 94 + const ZSTD_hufCTables_t* prevHuf, 95 + ZSTD_hufCTables_t* nextHuf, 96 + ZSTD_strategy strategy, 97 + int disableLiteralCompression, 98 + int suspectUncompressible, 99 + int bmi2) 100 + { 120 101 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); 121 102 BYTE* const ostart = (BYTE*)dst; 122 103 U32 singleStream = srcSize < 256; 123 - symbolEncodingType_e hType = set_compressed; 104 + SymbolEncodingType_e hType = set_compressed; 124 105 size_t cLitSize; 125 106 126 - DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", 127 - disableLiteralCompression, (U32)srcSize); 107 + DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)", 108 + disableLiteralCompression, (U32)srcSize, dstCapacity); 109 + 110 + DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize)); 128 111 129 112 /* Prepare nextEntropy assuming reusing the existing table */ 130 113 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); ··· 155 92 if (disableLiteralCompression) 156 93 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); 157 94 158 - /* small ? don't even attempt compression (speed opt) */ 159 - # define COMPRESS_LITERALS_SIZE_MIN 63 160 - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; 161 - if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); 162 - } 95 + /* if too small, don't even attempt compression (speed opt) */ 96 + if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode)) 97 + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); 163 98 164 99 RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); 165 100 { HUF_repeat repeat = prevHuf->repeatMode; 166 - int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; 101 + int const flags = 0 102 + | (bmi2 ? HUF_flags_bmi2 : 0) 103 + | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0) 104 + | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0) 105 + | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0); 106 + 107 + typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int); 108 + huf_compress_f huf_compress; 167 109 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; 168 - cLitSize = singleStream ? 169 - HUF_compress1X_repeat( 170 - ostart+lhSize, dstCapacity-lhSize, src, srcSize, 171 - HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, 172 - (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) : 173 - HUF_compress4X_repeat( 174 - ostart+lhSize, dstCapacity-lhSize, src, srcSize, 175 - HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, 176 - (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible); 110 + huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat; 111 + cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize, 112 + src, srcSize, 113 + HUF_SYMBOLVALUE_MAX, LitHufLog, 114 + entropyWorkspace, entropyWorkspaceSize, 115 + (HUF_CElt*)nextHuf->CTable, 116 + &repeat, flags); 117 + DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize); 177 118 if (repeat != HUF_repeat_none) { 178 119 /* reused the existing table */ 179 - DEBUGLOG(5, "Reusing previous huffman table"); 120 + DEBUGLOG(5, "reusing statistics from previous huffman block"); 180 121 hType = set_repeat; 181 122 } 182 123 } 183 124 184 - if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { 185 - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 186 - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); 187 - } 125 + { size_t const minGain = ZSTD_minGain(srcSize, strategy); 126 + if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { 127 + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 128 + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); 129 + } } 188 130 if (cLitSize==1) { 189 - ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 190 - return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); 191 - } 131 + /* A return value of 1 signals that the alphabet consists of a single symbol. 132 + * However, in some rare circumstances, it could be the compressed size (a single byte). 133 + * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`. 134 + * (it's also necessary to not generate statistics). 135 + * Therefore, in such a case, actively check that all bytes are identical. */ 136 + if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) { 137 + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); 138 + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); 139 + } } 192 140 193 141 if (hType == set_compressed) { 194 142 /* using a newly constructed table */ ··· 210 136 switch(lhSize) 211 137 { 212 138 case 3: /* 2 - 2 - 10 - 10 */ 213 - { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); 139 + if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); 140 + { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); 214 141 MEM_writeLE24(ostart, lhc); 215 142 break; 216 143 } 217 144 case 4: /* 2 - 2 - 14 - 14 */ 145 + assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); 218 146 { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); 219 147 MEM_writeLE32(ostart, lhc); 220 148 break; 221 149 } 222 150 case 5: /* 2 - 2 - 18 - 18 */ 151 + assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); 223 152 { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); 224 153 MEM_writeLE32(ostart, lhc); 225 154 ostart[4] = (BYTE)(cLitSize >> 10);
+17 -8
lib/zstd/compress/zstd_compress_literals.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 17 16 18 17 size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); 19 18 19 + /* ZSTD_compressRleLiteralsBlock() : 20 + * Conditions : 21 + * - All bytes in @src are identical 22 + * - dstCapacity >= 4 */ 20 23 size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); 21 24 22 - /* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ 23 - size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, 24 - ZSTD_hufCTables_t* nextHuf, 25 - ZSTD_strategy strategy, int disableLiteralCompression, 26 - void* dst, size_t dstCapacity, 25 + /* ZSTD_compressLiterals(): 26 + * @entropyWorkspace: must be aligned on 4-bytes boundaries 27 + * @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE 28 + * @suspectUncompressible: sampling checks, to potentially skip huffman coding 29 + */ 30 + size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity, 27 31 const void* src, size_t srcSize, 28 32 void* entropyWorkspace, size_t entropyWorkspaceSize, 29 - const int bmi2, 30 - unsigned suspectUncompressible); 33 + const ZSTD_hufCTables_t* prevHuf, 34 + ZSTD_hufCTables_t* nextHuf, 35 + ZSTD_strategy strategy, int disableLiteralCompression, 36 + int suspectUncompressible, 37 + int bmi2); 31 38 32 39 #endif /* ZSTD_COMPRESS_LITERALS_H */
+11 -10
lib/zstd/compress/zstd_compress_sequences.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 59 58 { 60 59 /* Heuristic: This should cover most blocks <= 16K and 61 60 * start to fade out after 16K to about 32K depending on 62 - * comprssibility. 61 + * compressibility. 63 62 */ 64 63 return nbSeq >= 2048; 65 64 } ··· 154 153 return cost >> 8; 155 154 } 156 155 157 - symbolEncodingType_e 156 + SymbolEncodingType_e 158 157 ZSTD_selectEncodingType( 159 158 FSE_repeat* repeatMode, unsigned const* count, unsigned const max, 160 159 size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, 161 160 FSE_CTable const* prevCTable, 162 161 short const* defaultNorm, U32 defaultNormLog, 163 - ZSTD_defaultPolicy_e const isDefaultAllowed, 162 + ZSTD_DefaultPolicy_e const isDefaultAllowed, 164 163 ZSTD_strategy const strategy) 165 164 { 166 165 ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); 167 166 if (mostFrequent == nbSeq) { 168 167 *repeatMode = FSE_repeat_none; 169 168 if (isDefaultAllowed && nbSeq <= 2) { 170 - /* Prefer set_basic over set_rle when there are 2 or less symbols, 169 + /* Prefer set_basic over set_rle when there are 2 or fewer symbols, 171 170 * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. 172 171 * If basic encoding isn't possible, always choose RLE. 173 172 */ ··· 242 241 243 242 size_t 244 243 ZSTD_buildCTable(void* dst, size_t dstCapacity, 245 - FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, 244 + FSE_CTable* nextCTable, U32 FSELog, SymbolEncodingType_e type, 246 245 unsigned* count, U32 max, 247 246 const BYTE* codeTable, size_t nbSeq, 248 247 const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, ··· 294 293 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, 295 294 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, 296 295 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, 297 - seqDef const* sequences, size_t nbSeq, int longOffsets) 296 + SeqDef const* sequences, size_t nbSeq, int longOffsets) 298 297 { 299 298 BIT_CStream_t blockStream; 300 299 FSE_CState_t stateMatchLength; ··· 388 387 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, 389 388 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, 390 389 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, 391 - seqDef const* sequences, size_t nbSeq, int longOffsets) 390 + SeqDef const* sequences, size_t nbSeq, int longOffsets) 392 391 { 393 392 return ZSTD_encodeSequences_body(dst, dstCapacity, 394 393 CTable_MatchLength, mlCodeTable, ··· 406 405 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, 407 406 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, 408 407 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, 409 - seqDef const* sequences, size_t nbSeq, int longOffsets) 408 + SeqDef const* sequences, size_t nbSeq, int longOffsets) 410 409 { 411 410 return ZSTD_encodeSequences_body(dst, dstCapacity, 412 411 CTable_MatchLength, mlCodeTable, ··· 422 421 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, 423 422 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, 424 423 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, 425 - seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) 424 + SeqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) 426 425 { 427 426 DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); 428 427 #if DYNAMIC_BMI2
+9 -7
lib/zstd/compress/zstd_compress_sequences.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 12 11 #ifndef ZSTD_COMPRESS_SEQUENCES_H 13 12 #define ZSTD_COMPRESS_SEQUENCES_H 14 13 14 + #include "zstd_compress_internal.h" /* SeqDef */ 15 15 #include "../common/fse.h" /* FSE_repeat, FSE_CTable */ 16 - #include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */ 16 + #include "../common/zstd_internal.h" /* SymbolEncodingType_e, ZSTD_strategy */ 17 17 18 18 typedef enum { 19 19 ZSTD_defaultDisallowed = 0, 20 20 ZSTD_defaultAllowed = 1 21 - } ZSTD_defaultPolicy_e; 21 + } ZSTD_DefaultPolicy_e; 22 22 23 - symbolEncodingType_e 23 + SymbolEncodingType_e 24 24 ZSTD_selectEncodingType( 25 25 FSE_repeat* repeatMode, unsigned const* count, unsigned const max, 26 26 size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, 27 27 FSE_CTable const* prevCTable, 28 28 short const* defaultNorm, U32 defaultNormLog, 29 - ZSTD_defaultPolicy_e const isDefaultAllowed, 29 + ZSTD_DefaultPolicy_e const isDefaultAllowed, 30 30 ZSTD_strategy const strategy); 31 31 32 32 size_t 33 33 ZSTD_buildCTable(void* dst, size_t dstCapacity, 34 - FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, 34 + FSE_CTable* nextCTable, U32 FSELog, SymbolEncodingType_e type, 35 35 unsigned* count, U32 max, 36 36 const BYTE* codeTable, size_t nbSeq, 37 37 const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, ··· 44 42 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, 45 43 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, 46 44 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, 47 - seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); 45 + SeqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); 48 46 49 47 size_t ZSTD_fseBitCost( 50 48 FSE_CTable const* ctable,
+253 -137
lib/zstd/compress/zstd_compress_superblock.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 37 36 * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block 38 37 * and the following sub-blocks' literals sections will be Treeless_Literals_Block. 39 38 * @return : compressed size of literals section of a sub-block 40 - * Or 0 if it unable to compress. 39 + * Or 0 if unable to compress. 41 40 * Or error code */ 42 - static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, 43 - const ZSTD_hufCTablesMetadata_t* hufMetadata, 44 - const BYTE* literals, size_t litSize, 45 - void* dst, size_t dstSize, 46 - const int bmi2, int writeEntropy, int* entropyWritten) 41 + static size_t 42 + ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, 43 + const ZSTD_hufCTablesMetadata_t* hufMetadata, 44 + const BYTE* literals, size_t litSize, 45 + void* dst, size_t dstSize, 46 + const int bmi2, int writeEntropy, int* entropyWritten) 47 47 { 48 48 size_t const header = writeEntropy ? 200 : 0; 49 49 size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); ··· 52 50 BYTE* const oend = ostart + dstSize; 53 51 BYTE* op = ostart + lhSize; 54 52 U32 const singleStream = lhSize == 3; 55 - symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; 53 + SymbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; 56 54 size_t cLitSize = 0; 57 - 58 - (void)bmi2; /* TODO bmi2... */ 59 55 60 56 DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); 61 57 ··· 76 76 DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); 77 77 } 78 78 79 - /* TODO bmi2 */ 80 - { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) 81 - : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); 79 + { int const flags = bmi2 ? HUF_flags_bmi2 : 0; 80 + const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags) 81 + : HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags); 82 82 op += cSize; 83 83 cLitSize += cSize; 84 84 if (cSize == 0 || ERR_isError(cSize)) { ··· 103 103 switch(lhSize) 104 104 { 105 105 case 3: /* 2 - 2 - 10 - 10 */ 106 - { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); 106 + { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); 107 107 MEM_writeLE24(ostart, lhc); 108 108 break; 109 109 } ··· 123 123 } 124 124 *entropyWritten = 1; 125 125 DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); 126 - return op-ostart; 126 + return (size_t)(op-ostart); 127 127 } 128 128 129 - static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { 130 - const seqDef* const sstart = sequences; 131 - const seqDef* const send = sequences + nbSeq; 132 - const seqDef* sp = sstart; 129 + static size_t 130 + ZSTD_seqDecompressedSize(SeqStore_t const* seqStore, 131 + const SeqDef* sequences, size_t nbSeqs, 132 + size_t litSize, int lastSubBlock) 133 + { 133 134 size_t matchLengthSum = 0; 134 135 size_t litLengthSum = 0; 135 - (void)(litLengthSum); /* suppress unused variable warning on some environments */ 136 - while (send-sp > 0) { 137 - ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); 136 + size_t n; 137 + for (n=0; n<nbSeqs; n++) { 138 + const ZSTD_SequenceLength seqLen = ZSTD_getSequenceLength(seqStore, sequences+n); 138 139 litLengthSum += seqLen.litLength; 139 140 matchLengthSum += seqLen.matchLength; 140 - sp++; 141 141 } 142 - assert(litLengthSum <= litSize); 143 - if (!lastSequence) { 142 + DEBUGLOG(5, "ZSTD_seqDecompressedSize: %u sequences from %p: %u literals + %u matchlength", 143 + (unsigned)nbSeqs, (const void*)sequences, 144 + (unsigned)litLengthSum, (unsigned)matchLengthSum); 145 + if (!lastSubBlock) 144 146 assert(litLengthSum == litSize); 145 - } 147 + else 148 + assert(litLengthSum <= litSize); 149 + (void)litLengthSum; 146 150 return matchLengthSum + litSize; 147 151 } 148 152 ··· 160 156 * @return : compressed size of sequences section of a sub-block 161 157 * Or 0 if it is unable to compress 162 158 * Or error code. */ 163 - static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, 164 - const ZSTD_fseCTablesMetadata_t* fseMetadata, 165 - const seqDef* sequences, size_t nbSeq, 166 - const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, 167 - const ZSTD_CCtx_params* cctxParams, 168 - void* dst, size_t dstCapacity, 169 - const int bmi2, int writeEntropy, int* entropyWritten) 159 + static size_t 160 + ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, 161 + const ZSTD_fseCTablesMetadata_t* fseMetadata, 162 + const SeqDef* sequences, size_t nbSeq, 163 + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, 164 + const ZSTD_CCtx_params* cctxParams, 165 + void* dst, size_t dstCapacity, 166 + const int bmi2, int writeEntropy, int* entropyWritten) 170 167 { 171 168 const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; 172 169 BYTE* const ostart = (BYTE*)dst; ··· 181 176 /* Sequences Header */ 182 177 RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, 183 178 dstSize_tooSmall, ""); 184 - if (nbSeq < 0x7F) 179 + if (nbSeq < 128) 185 180 *op++ = (BYTE)nbSeq; 186 181 else if (nbSeq < LONGNBSEQ) 187 182 op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; 188 183 else 189 184 op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; 190 185 if (nbSeq==0) { 191 - return op - ostart; 186 + return (size_t)(op - ostart); 192 187 } 193 188 194 189 /* seqHead : flags for FSE encoding type */ ··· 210 205 } 211 206 212 207 { size_t const bitstreamSize = ZSTD_encodeSequences( 213 - op, oend - op, 208 + op, (size_t)(oend - op), 214 209 fseTables->matchlengthCTable, mlCode, 215 210 fseTables->offcodeCTable, ofCode, 216 211 fseTables->litlengthCTable, llCode, ··· 254 249 #endif 255 250 256 251 *entropyWritten = 1; 257 - return op - ostart; 252 + return (size_t)(op - ostart); 258 253 } 259 254 260 255 /* ZSTD_compressSubBlock() : ··· 263 258 * Or 0 if it failed to compress. */ 264 259 static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, 265 260 const ZSTD_entropyCTablesMetadata_t* entropyMetadata, 266 - const seqDef* sequences, size_t nbSeq, 261 + const SeqDef* sequences, size_t nbSeq, 267 262 const BYTE* literals, size_t litSize, 268 263 const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, 269 264 const ZSTD_CCtx_params* cctxParams, ··· 280 275 litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); 281 276 { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, 282 277 &entropyMetadata->hufMetadata, literals, litSize, 283 - op, oend-op, bmi2, writeLitEntropy, litEntropyWritten); 278 + op, (size_t)(oend-op), 279 + bmi2, writeLitEntropy, litEntropyWritten); 284 280 FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); 285 281 if (cLitSize == 0) return 0; 286 282 op += cLitSize; ··· 291 285 sequences, nbSeq, 292 286 llCode, mlCode, ofCode, 293 287 cctxParams, 294 - op, oend-op, 288 + op, (size_t)(oend-op), 295 289 bmi2, writeSeqEntropy, seqEntropyWritten); 296 290 FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); 297 291 if (cSeqSize == 0) return 0; 298 292 op += cSeqSize; 299 293 } 300 294 /* Write block header */ 301 - { size_t cSize = (op-ostart)-ZSTD_blockHeaderSize; 295 + { size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize; 302 296 U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); 303 297 MEM_writeLE24(ostart, cBlockHeader24); 304 298 } 305 - return op-ostart; 299 + return (size_t)(op-ostart); 306 300 } 307 301 308 302 static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, ··· 328 322 return 0; 329 323 } 330 324 331 - static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, 325 + static size_t ZSTD_estimateSubBlockSize_symbolType(SymbolEncodingType_e type, 332 326 const BYTE* codeTable, unsigned maxCode, 333 327 size_t nbSeq, const FSE_CTable* fseCTable, 334 328 const U8* additionalBits, ··· 391 385 return cSeqSizeEstimate + sequencesSectionHeaderSize; 392 386 } 393 387 394 - static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, 388 + typedef struct { 389 + size_t estLitSize; 390 + size_t estBlockSize; 391 + } EstimatedBlockSize; 392 + static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, 395 393 const BYTE* ofCodeTable, 396 394 const BYTE* llCodeTable, 397 395 const BYTE* mlCodeTable, ··· 403 393 const ZSTD_entropyCTables_t* entropy, 404 394 const ZSTD_entropyCTablesMetadata_t* entropyMetadata, 405 395 void* workspace, size_t wkspSize, 406 - int writeLitEntropy, int writeSeqEntropy) { 407 - size_t cSizeEstimate = 0; 408 - cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, 409 - &entropy->huf, &entropyMetadata->hufMetadata, 410 - workspace, wkspSize, writeLitEntropy); 411 - cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, 396 + int writeLitEntropy, int writeSeqEntropy) 397 + { 398 + EstimatedBlockSize ebs; 399 + ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize, 400 + &entropy->huf, &entropyMetadata->hufMetadata, 401 + workspace, wkspSize, writeLitEntropy); 402 + ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, 412 403 nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, 413 404 workspace, wkspSize, writeSeqEntropy); 414 - return cSizeEstimate + ZSTD_blockHeaderSize; 405 + ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize; 406 + return ebs; 415 407 } 416 408 417 409 static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) ··· 427 415 return 0; 428 416 } 429 417 418 + static size_t countLiterals(SeqStore_t const* seqStore, const SeqDef* sp, size_t seqCount) 419 + { 420 + size_t n, total = 0; 421 + assert(sp != NULL); 422 + for (n=0; n<seqCount; n++) { 423 + total += ZSTD_getSequenceLength(seqStore, sp+n).litLength; 424 + } 425 + DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total); 426 + return total; 427 + } 428 + 429 + #define BYTESCALE 256 430 + 431 + static size_t sizeBlockSequences(const SeqDef* sp, size_t nbSeqs, 432 + size_t targetBudget, size_t avgLitCost, size_t avgSeqCost, 433 + int firstSubBlock) 434 + { 435 + size_t n, budget = 0, inSize=0; 436 + /* entropy headers */ 437 + size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */ 438 + assert(firstSubBlock==0 || firstSubBlock==1); 439 + budget += headerSize; 440 + 441 + /* first sequence => at least one sequence*/ 442 + budget += sp[0].litLength * avgLitCost + avgSeqCost; 443 + if (budget > targetBudget) return 1; 444 + inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH); 445 + 446 + /* loop over sequences */ 447 + for (n=1; n<nbSeqs; n++) { 448 + size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost; 449 + budget += currentCost; 450 + inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH); 451 + /* stop when sub-block budget is reached */ 452 + if ( (budget > targetBudget) 453 + /* though continue to expand until the sub-block is deemed compressible */ 454 + && (budget < inSize * BYTESCALE) ) 455 + break; 456 + } 457 + 458 + return n; 459 + } 460 + 430 461 /* ZSTD_compressSubBlock_multi() : 431 462 * Breaks super-block into multiple sub-blocks and compresses them. 432 - * Entropy will be written to the first block. 433 - * The following blocks will use repeat mode to compress. 434 - * All sub-blocks are compressed blocks (no raw or rle blocks). 435 - * @return : compressed size of the super block (which is multiple ZSTD blocks) 436 - * Or 0 if it failed to compress. */ 437 - static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, 463 + * Entropy will be written into the first block. 464 + * The following blocks use repeat_mode to compress. 465 + * Sub-blocks are all compressed, except the last one when beneficial. 466 + * @return : compressed size of the super block (which features multiple ZSTD blocks) 467 + * or 0 if it failed to compress. */ 468 + static size_t ZSTD_compressSubBlock_multi(const SeqStore_t* seqStorePtr, 438 469 const ZSTD_compressedBlockState_t* prevCBlock, 439 470 ZSTD_compressedBlockState_t* nextCBlock, 440 471 const ZSTD_entropyCTablesMetadata_t* entropyMetadata, ··· 487 432 const int bmi2, U32 lastBlock, 488 433 void* workspace, size_t wkspSize) 489 434 { 490 - const seqDef* const sstart = seqStorePtr->sequencesStart; 491 - const seqDef* const send = seqStorePtr->sequences; 492 - const seqDef* sp = sstart; 435 + const SeqDef* const sstart = seqStorePtr->sequencesStart; 436 + const SeqDef* const send = seqStorePtr->sequences; 437 + const SeqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */ 438 + size_t const nbSeqs = (size_t)(send - sstart); 493 439 const BYTE* const lstart = seqStorePtr->litStart; 494 440 const BYTE* const lend = seqStorePtr->lit; 495 441 const BYTE* lp = lstart; 442 + size_t const nbLiterals = (size_t)(lend - lstart); 496 443 BYTE const* ip = (BYTE const*)src; 497 444 BYTE const* const iend = ip + srcSize; 498 445 BYTE* const ostart = (BYTE*)dst; ··· 503 446 const BYTE* llCodePtr = seqStorePtr->llCode; 504 447 const BYTE* mlCodePtr = seqStorePtr->mlCode; 505 448 const BYTE* ofCodePtr = seqStorePtr->ofCode; 506 - size_t targetCBlockSize = cctxParams->targetCBlockSize; 507 - size_t litSize, seqCount; 508 - int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed; 449 + size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */ 450 + size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize); 451 + int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed); 509 452 int writeSeqEntropy = 1; 510 - int lastSequence = 0; 511 453 512 - DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", 513 - (unsigned)(lend-lp), (unsigned)(send-sstart)); 454 + DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)", 455 + (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart)); 514 456 515 - litSize = 0; 516 - seqCount = 0; 517 - do { 518 - size_t cBlockSizeEstimate = 0; 519 - if (sstart == send) { 520 - lastSequence = 1; 521 - } else { 522 - const seqDef* const sequence = sp + seqCount; 523 - lastSequence = sequence == send - 1; 524 - litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength; 525 - seqCount++; 457 + /* let's start by a general estimation for the full block */ 458 + if (nbSeqs > 0) { 459 + EstimatedBlockSize const ebs = 460 + ZSTD_estimateSubBlockSize(lp, nbLiterals, 461 + ofCodePtr, llCodePtr, mlCodePtr, nbSeqs, 462 + &nextCBlock->entropy, entropyMetadata, 463 + workspace, wkspSize, 464 + writeLitEntropy, writeSeqEntropy); 465 + /* quick estimation */ 466 + size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE; 467 + size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs; 468 + const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1); 469 + size_t n, avgBlockBudget, blockBudgetSupp=0; 470 + avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks; 471 + DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes", 472 + (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE, 473 + (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE); 474 + /* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately 475 + * this will result in the production of a single uncompressed block covering @srcSize.*/ 476 + if (ebs.estBlockSize > srcSize) return 0; 477 + 478 + /* compress and write sub-blocks */ 479 + assert(nbSubBlocks>0); 480 + for (n=0; n < nbSubBlocks-1; n++) { 481 + /* determine nb of sequences for current sub-block + nbLiterals from next sequence */ 482 + size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp), 483 + avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0); 484 + /* if reached last sequence : break to last sub-block (simplification) */ 485 + assert(seqCount <= (size_t)(send-sp)); 486 + if (sp + seqCount == send) break; 487 + assert(seqCount > 0); 488 + /* compress sub-block */ 489 + { int litEntropyWritten = 0; 490 + int seqEntropyWritten = 0; 491 + size_t litSize = countLiterals(seqStorePtr, sp, seqCount); 492 + const size_t decompressedSize = 493 + ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0); 494 + size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, 495 + sp, seqCount, 496 + lp, litSize, 497 + llCodePtr, mlCodePtr, ofCodePtr, 498 + cctxParams, 499 + op, (size_t)(oend-op), 500 + bmi2, writeLitEntropy, writeSeqEntropy, 501 + &litEntropyWritten, &seqEntropyWritten, 502 + 0); 503 + FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); 504 + 505 + /* check compressibility, update state components */ 506 + if (cSize > 0 && cSize < decompressedSize) { 507 + DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes", 508 + (unsigned)decompressedSize, (unsigned)cSize); 509 + assert(ip + decompressedSize <= iend); 510 + ip += decompressedSize; 511 + lp += litSize; 512 + op += cSize; 513 + llCodePtr += seqCount; 514 + mlCodePtr += seqCount; 515 + ofCodePtr += seqCount; 516 + /* Entropy only needs to be written once */ 517 + if (litEntropyWritten) { 518 + writeLitEntropy = 0; 519 + } 520 + if (seqEntropyWritten) { 521 + writeSeqEntropy = 0; 522 + } 523 + sp += seqCount; 524 + blockBudgetSupp = 0; 525 + } } 526 + /* otherwise : do not compress yet, coalesce current sub-block with following one */ 526 527 } 527 - if (lastSequence) { 528 - assert(lp <= lend); 529 - assert(litSize <= (size_t)(lend - lp)); 530 - litSize = (size_t)(lend - lp); 531 - } 532 - /* I think there is an optimization opportunity here. 533 - * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful 534 - * since it recalculates estimate from scratch. 535 - * For example, it would recount literal distribution and symbol codes every time. 536 - */ 537 - cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, 538 - &nextCBlock->entropy, entropyMetadata, 539 - workspace, wkspSize, writeLitEntropy, writeSeqEntropy); 540 - if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { 541 - int litEntropyWritten = 0; 542 - int seqEntropyWritten = 0; 543 - const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence); 544 - const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, 545 - sp, seqCount, 546 - lp, litSize, 547 - llCodePtr, mlCodePtr, ofCodePtr, 548 - cctxParams, 549 - op, oend-op, 550 - bmi2, writeLitEntropy, writeSeqEntropy, 551 - &litEntropyWritten, &seqEntropyWritten, 552 - lastBlock && lastSequence); 553 - FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); 554 - if (cSize > 0 && cSize < decompressedSize) { 555 - DEBUGLOG(5, "Committed the sub-block"); 556 - assert(ip + decompressedSize <= iend); 557 - ip += decompressedSize; 558 - sp += seqCount; 559 - lp += litSize; 560 - op += cSize; 561 - llCodePtr += seqCount; 562 - mlCodePtr += seqCount; 563 - ofCodePtr += seqCount; 564 - litSize = 0; 565 - seqCount = 0; 566 - /* Entropy only needs to be written once */ 567 - if (litEntropyWritten) { 568 - writeLitEntropy = 0; 569 - } 570 - if (seqEntropyWritten) { 571 - writeSeqEntropy = 0; 572 - } 528 + } /* if (nbSeqs > 0) */ 529 + 530 + /* write last block */ 531 + DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp)); 532 + { int litEntropyWritten = 0; 533 + int seqEntropyWritten = 0; 534 + size_t litSize = (size_t)(lend - lp); 535 + size_t seqCount = (size_t)(send - sp); 536 + const size_t decompressedSize = 537 + ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1); 538 + size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, 539 + sp, seqCount, 540 + lp, litSize, 541 + llCodePtr, mlCodePtr, ofCodePtr, 542 + cctxParams, 543 + op, (size_t)(oend-op), 544 + bmi2, writeLitEntropy, writeSeqEntropy, 545 + &litEntropyWritten, &seqEntropyWritten, 546 + lastBlock); 547 + FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); 548 + 549 + /* update pointers, the nb of literals borrowed from next sequence must be preserved */ 550 + if (cSize > 0 && cSize < decompressedSize) { 551 + DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes", 552 + (unsigned)decompressedSize, (unsigned)cSize); 553 + assert(ip + decompressedSize <= iend); 554 + ip += decompressedSize; 555 + lp += litSize; 556 + op += cSize; 557 + llCodePtr += seqCount; 558 + mlCodePtr += seqCount; 559 + ofCodePtr += seqCount; 560 + /* Entropy only needs to be written once */ 561 + if (litEntropyWritten) { 562 + writeLitEntropy = 0; 573 563 } 564 + if (seqEntropyWritten) { 565 + writeSeqEntropy = 0; 566 + } 567 + sp += seqCount; 574 568 } 575 - } while (!lastSequence); 569 + } 570 + 571 + 576 572 if (writeLitEntropy) { 577 - DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); 573 + DEBUGLOG(5, "Literal entropy tables were never written"); 578 574 ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); 579 575 } 580 576 if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { 581 577 /* If we haven't written our entropy tables, then we've violated our contract and 582 578 * must emit an uncompressed block. 583 579 */ 584 - DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten"); 580 + DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block"); 585 581 return 0; 586 582 } 583 + 587 584 if (ip < iend) { 588 - size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock); 589 - DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip)); 585 + /* some data left : last part of the block sent uncompressed */ 586 + size_t const rSize = (size_t)((iend - ip)); 587 + size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock); 588 + DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize)); 590 589 FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); 591 590 assert(cSize != 0); 592 591 op += cSize; 593 592 /* We have to regenerate the repcodes because we've skipped some sequences */ 594 593 if (sp < send) { 595 - seqDef const* seq; 596 - repcodes_t rep; 594 + const SeqDef* seq; 595 + Repcodes_t rep; 597 596 ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); 598 597 for (seq = sstart; seq < sp; ++seq) { 599 - ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); 598 + ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); 600 599 } 601 600 ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep)); 602 601 } 603 602 } 604 - DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); 605 - return op-ostart; 603 + 604 + DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u", 605 + (unsigned)(op-ostart)); 606 + return (size_t)(op-ostart); 606 607 } 607 608 608 609 size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, 609 610 void* dst, size_t dstCapacity, 610 - void const* src, size_t srcSize, 611 - unsigned lastBlock) { 611 + const void* src, size_t srcSize, 612 + unsigned lastBlock) 613 + { 612 614 ZSTD_entropyCTablesMetadata_t entropyMetadata; 613 615 614 616 FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore, ··· 675 559 &zc->blockState.nextCBlock->entropy, 676 560 &zc->appliedParams, 677 561 &entropyMetadata, 678 - zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); 562 + zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */), ""); 679 563 680 564 return ZSTD_compressSubBlock_multi(&zc->seqStore, 681 565 zc->blockState.prevCBlock, ··· 685 569 dst, dstCapacity, 686 570 src, srcSize, 687 571 zc->bmi2, lastBlock, 688 - zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */); 572 + zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */); 689 573 }
+2 -1
lib/zstd/compress/zstd_compress_superblock.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the
+142 -80
lib/zstd/compress/zstd_cwksp.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 15 14 /*-************************************* 16 15 * Dependencies 17 16 ***************************************/ 17 + #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */ 18 18 #include "../common/zstd_internal.h" 19 - 19 + #include "../common/portability_macros.h" 20 + #include "../common/compiler.h" /* ZS2_isPower2 */ 20 21 21 22 /*-************************************* 22 23 * Constants ··· 44 41 ***************************************/ 45 42 typedef enum { 46 43 ZSTD_cwksp_alloc_objects, 47 - ZSTD_cwksp_alloc_buffers, 48 - ZSTD_cwksp_alloc_aligned 44 + ZSTD_cwksp_alloc_aligned_init_once, 45 + ZSTD_cwksp_alloc_aligned, 46 + ZSTD_cwksp_alloc_buffers 49 47 } ZSTD_cwksp_alloc_phase_e; 50 48 51 49 /* ··· 99 95 * 100 96 * Workspace Layout: 101 97 * 102 - * [ ... workspace ... ] 103 - * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] 98 + * [ ... workspace ... ] 99 + * [objects][tables ->] free space [<- buffers][<- aligned][<- init once] 104 100 * 105 101 * The various objects that live in the workspace are divided into the 106 102 * following categories, and are allocated separately: ··· 124 120 * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). 125 121 * Their sizes depend on the cparams. These tables are 64-byte aligned. 126 122 * 127 - * - Aligned: these buffers are used for various purposes that require 4 byte 128 - * alignment, but don't require any initialization before they're used. These 129 - * buffers are each aligned to 64 bytes. 123 + * - Init once: these buffers require to be initialized at least once before 124 + * use. They should be used when we want to skip memory initialization 125 + * while not triggering memory checkers (like Valgrind) when reading from 126 + * from this memory without writing to it first. 127 + * These buffers should be used carefully as they might contain data 128 + * from previous compressions. 129 + * Buffers are aligned to 64 bytes. 130 + * 131 + * - Aligned: these buffers don't require any initialization before they're 132 + * used. The user of the buffer should make sure they write into a buffer 133 + * location before reading from it. 134 + * Buffers are aligned to 64 bytes. 130 135 * 131 136 * - Buffers: these buffers are used for various purposes that don't require 132 137 * any alignment or initialization before they're used. This means they can ··· 147 134 * correctly packed into the workspace buffer. That order is: 148 135 * 149 136 * 1. Objects 150 - * 2. Buffers 151 - * 3. Aligned/Tables 137 + * 2. Init once / Tables 138 + * 3. Aligned / Tables 139 + * 4. Buffers / Tables 152 140 * 153 141 * Attempts to reserve objects of different types out of order will fail. 154 142 */ ··· 161 147 void* tableEnd; 162 148 void* tableValidEnd; 163 149 void* allocStart; 150 + void* initOnceStart; 164 151 165 152 BYTE allocFailed; 166 153 int workspaceOversizedDuration; ··· 174 159 ***************************************/ 175 160 176 161 MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); 162 + MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws); 177 163 178 164 MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { 179 165 (void)ws; ··· 184 168 assert(ws->tableEnd <= ws->allocStart); 185 169 assert(ws->tableValidEnd <= ws->allocStart); 186 170 assert(ws->allocStart <= ws->workspaceEnd); 171 + assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws)); 172 + assert(ws->workspace <= ws->initOnceStart); 187 173 } 188 174 189 175 /* 190 176 * Align must be a power of 2. 191 177 */ 192 - MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { 178 + MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t align) { 193 179 size_t const mask = align - 1; 194 - assert((align & mask) == 0); 180 + assert(ZSTD_isPower2(align)); 195 181 return (size + mask) & ~mask; 196 182 } 197 183 ··· 207 189 * to figure out how much space you need for the matchState tables. Everything 208 190 * else is though. 209 191 * 210 - * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size(). 192 + * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned64_alloc_size(). 211 193 */ 212 194 MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { 213 195 if (size == 0) ··· 215 197 return size; 216 198 } 217 199 200 + MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size, size_t alignment) { 201 + return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, alignment)); 202 + } 203 + 218 204 /* 219 205 * Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes. 220 206 * Used to determine the number of bytes required for a given "aligned". 221 207 */ 222 - MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) { 223 - return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES)); 208 + MEM_STATIC size_t ZSTD_cwksp_aligned64_alloc_size(size_t size) { 209 + return ZSTD_cwksp_aligned_alloc_size(size, ZSTD_CWKSP_ALIGNMENT_BYTES); 224 210 } 225 211 226 212 /* ··· 232 210 * for internal purposes (currently only alignment). 233 211 */ 234 212 MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) { 235 - /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes 236 - * to align the beginning of tables section, as well as another n_2=[0, 63] bytes 237 - * to align the beginning of the aligned section. 238 - * 239 - * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and 240 - * aligneds being sized in multiples of 64 bytes. 213 + /* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES 214 + * bytes to align the beginning of tables section and end of buffers; 241 215 */ 242 - size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES; 216 + size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2; 243 217 return slackSpace; 244 218 } 245 219 ··· 247 229 MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) { 248 230 size_t const alignBytesMask = alignBytes - 1; 249 231 size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask; 250 - assert((alignBytes & alignBytesMask) == 0); 251 - assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES); 232 + assert(ZSTD_isPower2(alignBytes)); 233 + assert(bytes < alignBytes); 252 234 return bytes; 235 + } 236 + 237 + /* 238 + * Returns the initial value for allocStart which is used to determine the position from 239 + * which we can allocate from the end of the workspace. 240 + */ 241 + MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) 242 + { 243 + char* endPtr = (char*)ws->workspaceEnd; 244 + assert(ZSTD_isPower2(ZSTD_CWKSP_ALIGNMENT_BYTES)); 245 + endPtr = endPtr - ((size_t)endPtr % ZSTD_CWKSP_ALIGNMENT_BYTES); 246 + return (void*)endPtr; 253 247 } 254 248 255 249 /* ··· 276 246 { 277 247 void* const alloc = (BYTE*)ws->allocStart - bytes; 278 248 void* const bottom = ws->tableEnd; 279 - DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", 249 + DEBUGLOG(5, "cwksp: reserving [0x%p]:%zd bytes; %zd bytes remaining", 280 250 alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); 281 251 ZSTD_cwksp_assert_internal_consistency(ws); 282 252 assert(alloc >= bottom); ··· 304 274 { 305 275 assert(phase >= ws->phase); 306 276 if (phase > ws->phase) { 307 - /* Going from allocating objects to allocating buffers */ 308 - if (ws->phase < ZSTD_cwksp_alloc_buffers && 309 - phase >= ZSTD_cwksp_alloc_buffers) { 277 + /* Going from allocating objects to allocating initOnce / tables */ 278 + if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once && 279 + phase >= ZSTD_cwksp_alloc_aligned_init_once) { 310 280 ws->tableValidEnd = ws->objectEnd; 311 - } 281 + ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws); 312 282 313 - /* Going from allocating buffers to allocating aligneds/tables */ 314 - if (ws->phase < ZSTD_cwksp_alloc_aligned && 315 - phase >= ZSTD_cwksp_alloc_aligned) { 316 - { /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */ 317 - size_t const bytesToAlign = 318 - ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES); 319 - DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign); 320 - ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */ 321 - RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign), 322 - memory_allocation, "aligned phase - alignment initial allocation failed!"); 323 - } 324 283 { /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */ 325 - void* const alloc = ws->objectEnd; 284 + void *const alloc = ws->objectEnd; 326 285 size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES); 327 - void* const objectEnd = (BYTE*)alloc + bytesToAlign; 286 + void *const objectEnd = (BYTE *) alloc + bytesToAlign; 328 287 DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign); 329 288 RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation, 330 289 "table phase - alignment initial allocation failed!"); ··· 321 302 ws->tableEnd = objectEnd; /* table area starts being empty */ 322 303 if (ws->tableValidEnd < ws->tableEnd) { 323 304 ws->tableValidEnd = ws->tableEnd; 324 - } } } 305 + } 306 + } 307 + } 325 308 ws->phase = phase; 326 309 ZSTD_cwksp_assert_internal_consistency(ws); 327 310 } ··· 335 314 */ 336 315 MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) 337 316 { 338 - return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); 317 + return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd); 339 318 } 340 319 341 320 /* ··· 366 345 367 346 /* 368 347 * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes). 348 + * This memory has been initialized at least once in the past. 349 + * This doesn't mean it has been initialized this time, and it might contain data from previous 350 + * operations. 351 + * The main usage is for algorithms that might need read access into uninitialized memory. 352 + * The algorithm must maintain safety under these conditions and must make sure it doesn't 353 + * leak any of the past data (directly or in side channels). 369 354 */ 370 - MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) 355 + MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes) 371 356 { 372 - void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES), 373 - ZSTD_cwksp_alloc_aligned); 374 - assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); 357 + size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES); 358 + void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once); 359 + assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0); 360 + if(ptr && ptr < ws->initOnceStart) { 361 + /* We assume the memory following the current allocation is either: 362 + * 1. Not usable as initOnce memory (end of workspace) 363 + * 2. Another initOnce buffer that has been allocated before (and so was previously memset) 364 + * 3. An ASAN redzone, in which case we don't want to write on it 365 + * For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart. 366 + * Note that we assume here that MSAN and ASAN cannot run in the same time. */ 367 + ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes)); 368 + ws->initOnceStart = ptr; 369 + } 370 + return ptr; 371 + } 372 + 373 + /* 374 + * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes). 375 + */ 376 + MEM_STATIC void* ZSTD_cwksp_reserve_aligned64(ZSTD_cwksp* ws, size_t bytes) 377 + { 378 + void* const ptr = ZSTD_cwksp_reserve_internal(ws, 379 + ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES), 380 + ZSTD_cwksp_alloc_aligned); 381 + assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0); 375 382 return ptr; 376 383 } 377 384 378 385 /* 379 386 * Aligned on 64 bytes. These buffers have the special property that 380 - * their values remain constrained, allowing us to re-use them without 387 + * their values remain constrained, allowing us to reuse them without 381 388 * memset()-ing them. 382 389 */ 383 390 MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) 384 391 { 385 - const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; 392 + const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once; 386 393 void* alloc; 387 394 void* end; 388 395 void* top; 389 396 390 - if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) { 391 - return NULL; 397 + /* We can only start allocating tables after we are done reserving space for objects at the 398 + * start of the workspace */ 399 + if(ws->phase < phase) { 400 + if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) { 401 + return NULL; 402 + } 392 403 } 393 404 alloc = ws->tableEnd; 394 405 end = (BYTE *)alloc + bytes; ··· 440 387 441 388 442 389 assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0); 443 - assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); 390 + assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0); 444 391 return alloc; 445 392 } 446 393 ··· 474 421 475 422 return alloc; 476 423 } 424 + /* 425 + * with alignment control 426 + * Note : should happen only once, at workspace first initialization 427 + */ 428 + MEM_STATIC void* ZSTD_cwksp_reserve_object_aligned(ZSTD_cwksp* ws, size_t byteSize, size_t alignment) 429 + { 430 + size_t const mask = alignment - 1; 431 + size_t const surplus = (alignment > sizeof(void*)) ? alignment - sizeof(void*) : 0; 432 + void* const start = ZSTD_cwksp_reserve_object(ws, byteSize + surplus); 433 + if (start == NULL) return NULL; 434 + if (surplus == 0) return start; 435 + assert(ZSTD_isPower2(alignment)); 436 + return (void*)(((size_t)start + surplus) & ~mask); 437 + } 477 438 478 439 MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) 479 440 { ··· 518 451 assert(ws->tableValidEnd >= ws->objectEnd); 519 452 assert(ws->tableValidEnd <= ws->allocStart); 520 453 if (ws->tableValidEnd < ws->tableEnd) { 521 - ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); 454 + ZSTD_memset(ws->tableValidEnd, 0, (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd)); 522 455 } 523 456 ZSTD_cwksp_mark_tables_clean(ws); 524 457 } ··· 527 460 * Invalidates table allocations. 528 461 * All other allocations remain valid. 529 462 */ 530 - MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { 463 + MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) 464 + { 531 465 DEBUGLOG(4, "cwksp: clearing tables!"); 532 466 533 467 ··· 546 478 547 479 548 480 ws->tableEnd = ws->objectEnd; 549 - ws->allocStart = ws->workspaceEnd; 481 + ws->allocStart = ZSTD_cwksp_initialAllocStart(ws); 550 482 ws->allocFailed = 0; 551 - if (ws->phase > ZSTD_cwksp_alloc_buffers) { 552 - ws->phase = ZSTD_cwksp_alloc_buffers; 483 + if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) { 484 + ws->phase = ZSTD_cwksp_alloc_aligned_init_once; 553 485 } 554 486 ZSTD_cwksp_assert_internal_consistency(ws); 487 + } 488 + 489 + MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { 490 + return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); 491 + } 492 + 493 + MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) { 494 + return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace) 495 + + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart); 555 496 } 556 497 557 498 /* ··· 575 498 ws->workspaceEnd = (BYTE*)start + size; 576 499 ws->objectEnd = ws->workspace; 577 500 ws->tableValidEnd = ws->objectEnd; 501 + ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws); 578 502 ws->phase = ZSTD_cwksp_alloc_objects; 579 503 ws->isStatic = isStatic; 580 504 ZSTD_cwksp_clear(ws); ··· 607 529 ZSTD_memset(src, 0, sizeof(ZSTD_cwksp)); 608 530 } 609 531 610 - MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { 611 - return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); 612 - } 613 - 614 - MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) { 615 - return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace) 616 - + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart); 617 - } 618 - 619 532 MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { 620 533 return ws->allocFailed; 621 534 } ··· 619 550 * Returns if the estimated space needed for a wksp is within an acceptable limit of the 620 551 * actual amount of space used. 621 552 */ 622 - MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws, 623 - size_t const estimatedSpace, int resizedWorkspace) { 624 - if (resizedWorkspace) { 625 - /* Resized/newly allocated wksp should have exact bounds */ 626 - return ZSTD_cwksp_used(ws) == estimatedSpace; 627 - } else { 628 - /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes 629 - * than estimatedSpace. See the comments in zstd_cwksp.h for details. 630 - */ 631 - return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63); 632 - } 553 + MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) { 554 + /* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice 555 + * the alignment bytes difference between estimation and actual usage */ 556 + return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) && 557 + ZSTD_cwksp_used(ws) <= estimatedSpace; 633 558 } 634 559 635 560 ··· 653 590 ws->workspaceOversizedDuration = 0; 654 591 } 655 592 } 656 - 657 593 658 594 #endif /* ZSTD_CWKSP_H */
+164 -81
lib/zstd/compress/zstd_double_fast.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 12 11 #include "zstd_compress_internal.h" 13 12 #include "zstd_double_fast.h" 14 13 14 + #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR 15 15 16 - void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, 16 + static 17 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 18 + void ZSTD_fillDoubleHashTableForCDict(ZSTD_MatchState_t* ms, 19 + void const* end, ZSTD_dictTableLoadMethod_e dtlm) 20 + { 21 + const ZSTD_compressionParameters* const cParams = &ms->cParams; 22 + U32* const hashLarge = ms->hashTable; 23 + U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; 24 + U32 const mls = cParams->minMatch; 25 + U32* const hashSmall = ms->chainTable; 26 + U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; 27 + const BYTE* const base = ms->window.base; 28 + const BYTE* ip = base + ms->nextToUpdate; 29 + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; 30 + const U32 fastHashFillStep = 3; 31 + 32 + /* Always insert every fastHashFillStep position into the hash tables. 33 + * Insert the other positions into the large hash table if their entry 34 + * is empty. 35 + */ 36 + for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { 37 + U32 const curr = (U32)(ip - base); 38 + U32 i; 39 + for (i = 0; i < fastHashFillStep; ++i) { 40 + size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls); 41 + size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8); 42 + if (i == 0) { 43 + ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i); 44 + } 45 + if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { 46 + ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i); 47 + } 48 + /* Only load extra positions for ZSTD_dtlm_full */ 49 + if (dtlm == ZSTD_dtlm_fast) 50 + break; 51 + } } 52 + } 53 + 54 + static 55 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 56 + void ZSTD_fillDoubleHashTableForCCtx(ZSTD_MatchState_t* ms, 17 57 void const* end, ZSTD_dictTableLoadMethod_e dtlm) 18 58 { 19 59 const ZSTD_compressionParameters* const cParams = &ms->cParams; ··· 85 43 /* Only load extra positions for ZSTD_dtlm_full */ 86 44 if (dtlm == ZSTD_dtlm_fast) 87 45 break; 88 - } } 46 + } } 47 + } 48 + 49 + void ZSTD_fillDoubleHashTable(ZSTD_MatchState_t* ms, 50 + const void* const end, 51 + ZSTD_dictTableLoadMethod_e dtlm, 52 + ZSTD_tableFillPurpose_e tfp) 53 + { 54 + if (tfp == ZSTD_tfp_forCDict) { 55 + ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm); 56 + } else { 57 + ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm); 58 + } 89 59 } 90 60 91 61 92 62 FORCE_INLINE_TEMPLATE 63 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 93 64 size_t ZSTD_compressBlock_doubleFast_noDict_generic( 94 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 65 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 95 66 void const* src, size_t srcSize, U32 const mls /* template */) 96 67 { 97 68 ZSTD_compressionParameters const* cParams = &ms->cParams; ··· 122 67 const BYTE* const iend = istart + srcSize; 123 68 const BYTE* const ilimit = iend - HASH_READ_SIZE; 124 69 U32 offset_1=rep[0], offset_2=rep[1]; 125 - U32 offsetSaved = 0; 70 + U32 offsetSaved1 = 0, offsetSaved2 = 0; 126 71 127 72 size_t mLength; 128 73 U32 offset; ··· 143 88 const BYTE* matchl0; /* the long match for ip */ 144 89 const BYTE* matchs0; /* the short match for ip */ 145 90 const BYTE* matchl1; /* the long match for ip1 */ 91 + const BYTE* matchs0_safe; /* matchs0 or safe address */ 146 92 147 93 const BYTE* ip = istart; /* the current position */ 148 94 const BYTE* ip1; /* the next position */ 95 + /* Array of ~random data, should have low probability of matching data 96 + * we load from here instead of from tables, if matchl0/matchl1 are 97 + * invalid indices. Used to avoid unpredictable branches. */ 98 + const BYTE dummy[] = {0x12,0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0,0xe2,0xb4}; 149 99 150 100 DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic"); 151 101 ··· 160 100 U32 const current = (U32)(ip - base); 161 101 U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); 162 102 U32 const maxRep = current - windowLow; 163 - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; 164 - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; 103 + if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; 104 + if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; 165 105 } 166 106 167 107 /* Outer Loop: one iteration per match found and stored */ ··· 191 131 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { 192 132 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; 193 133 ip++; 194 - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); 134 + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); 195 135 goto _match_stored; 196 136 } 197 137 198 138 hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); 199 139 200 - if (idxl0 > prefixLowestIndex) { 140 + /* idxl0 > prefixLowestIndex is a (somewhat) unpredictable branch. 141 + * However expression below complies into conditional move. Since 142 + * match is unlikely and we only *branch* on idxl0 > prefixLowestIndex 143 + * if there is a match, all branches become predictable. */ 144 + { const BYTE* const matchl0_safe = ZSTD_selectAddr(idxl0, prefixLowestIndex, matchl0, &dummy[0]); 145 + 201 146 /* check prefix long match */ 202 - if (MEM_read64(matchl0) == MEM_read64(ip)) { 147 + if (MEM_read64(matchl0_safe) == MEM_read64(ip) && matchl0_safe == matchl0) { 203 148 mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8; 204 149 offset = (U32)(ip-matchl0); 205 150 while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ 206 151 goto _match_found; 207 - } 208 - } 152 + } } 209 153 210 154 idxl1 = hashLong[hl1]; 211 155 matchl1 = base + idxl1; 212 156 213 - if (idxs0 > prefixLowestIndex) { 214 - /* check prefix short match */ 215 - if (MEM_read32(matchs0) == MEM_read32(ip)) { 216 - goto _search_next_long; 217 - } 157 + /* Same optimization as matchl0 above */ 158 + matchs0_safe = ZSTD_selectAddr(idxs0, prefixLowestIndex, matchs0, &dummy[0]); 159 + 160 + /* check prefix short match */ 161 + if(MEM_read32(matchs0_safe) == MEM_read32(ip) && matchs0_safe == matchs0) { 162 + goto _search_next_long; 218 163 } 219 164 220 165 if (ip1 >= nextStep) { ··· 240 175 } while (ip1 <= ilimit); 241 176 242 177 _cleanup: 178 + /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), 179 + * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ 180 + offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; 181 + 243 182 /* save reps for next block */ 244 - rep[0] = offset_1 ? offset_1 : offsetSaved; 245 - rep[1] = offset_2 ? offset_2 : offsetSaved; 183 + rep[0] = offset_1 ? offset_1 : offsetSaved1; 184 + rep[1] = offset_2 ? offset_2 : offsetSaved2; 246 185 247 186 /* Return the last literals size */ 248 187 return (size_t)(iend - anchor); 249 188 250 189 _search_next_long: 251 190 252 - /* check prefix long +1 match */ 253 - if (idxl1 > prefixLowestIndex) { 254 - if (MEM_read64(matchl1) == MEM_read64(ip1)) { 191 + /* short match found: let's check for a longer one */ 192 + mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; 193 + offset = (U32)(ip - matchs0); 194 + 195 + /* check long match at +1 position */ 196 + if ((idxl1 > prefixLowestIndex) && (MEM_read64(matchl1) == MEM_read64(ip1))) { 197 + size_t const l1len = ZSTD_count(ip1+8, matchl1+8, iend) + 8; 198 + if (l1len > mLength) { 199 + /* use the long match instead */ 255 200 ip = ip1; 256 - mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8; 201 + mLength = l1len; 257 202 offset = (U32)(ip-matchl1); 258 - while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ 259 - goto _match_found; 203 + matchs0 = matchl1; 260 204 } 261 205 } 262 206 263 - /* if no long +1 match, explore the short match we found */ 264 - mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; 265 - offset = (U32)(ip - matchs0); 266 - while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ 207 + while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* complete backward */ 267 208 268 209 /* fall-through */ 269 210 ··· 288 217 hashLong[hl1] = (U32)(ip1 - base); 289 218 } 290 219 291 - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); 220 + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); 292 221 293 222 _match_stored: 294 223 /* match found */ ··· 314 243 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ 315 244 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); 316 245 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); 317 - ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength); 246 + ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength); 318 247 ip += rLength; 319 248 anchor = ip; 320 249 continue; /* faster when present ... (?) */ ··· 325 254 326 255 327 256 FORCE_INLINE_TEMPLATE 257 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 328 258 size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( 329 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 259 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 330 260 void const* src, size_t srcSize, 331 261 U32 const mls /* template */) 332 262 { ··· 347 275 const BYTE* const iend = istart + srcSize; 348 276 const BYTE* const ilimit = iend - HASH_READ_SIZE; 349 277 U32 offset_1=rep[0], offset_2=rep[1]; 350 - U32 offsetSaved = 0; 351 278 352 - const ZSTD_matchState_t* const dms = ms->dictMatchState; 279 + const ZSTD_MatchState_t* const dms = ms->dictMatchState; 353 280 const ZSTD_compressionParameters* const dictCParams = &dms->cParams; 354 281 const U32* const dictHashLong = dms->hashTable; 355 282 const U32* const dictHashSmall = dms->chainTable; ··· 357 286 const BYTE* const dictStart = dictBase + dictStartIndex; 358 287 const BYTE* const dictEnd = dms->window.nextSrc; 359 288 const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase); 360 - const U32 dictHBitsL = dictCParams->hashLog; 361 - const U32 dictHBitsS = dictCParams->chainLog; 289 + const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; 290 + const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; 362 291 const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); 363 292 364 293 DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic"); 365 294 366 295 /* if a dictionary is attached, it must be within window range */ 367 296 assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); 297 + 298 + if (ms->prefetchCDictTables) { 299 + size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); 300 + size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32); 301 + PREFETCH_AREA(dictHashLong, hashTableBytes); 302 + PREFETCH_AREA(dictHashSmall, chainTableBytes); 303 + } 368 304 369 305 /* init */ 370 306 ip += (dictAndPrefixLength == 0); ··· 387 309 U32 offset; 388 310 size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); 389 311 size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); 390 - size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); 391 - size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); 312 + size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8); 313 + size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls); 314 + U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS]; 315 + U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS]; 316 + int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL); 317 + int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS); 392 318 U32 const curr = (U32)(ip-base); 393 319 U32 const matchIndexL = hashLong[h2]; 394 320 U32 matchIndexS = hashSmall[h]; ··· 405 323 hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ 406 324 407 325 /* check repcode */ 408 - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) 326 + if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex)) 409 327 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 410 328 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; 411 329 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; 412 330 ip++; 413 - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); 331 + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); 414 332 goto _match_stored; 415 333 } 416 334 417 - if (matchIndexL > prefixLowestIndex) { 335 + if ((matchIndexL >= prefixLowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { 418 336 /* check prefix long match */ 419 - if (MEM_read64(matchLong) == MEM_read64(ip)) { 420 - mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; 421 - offset = (U32)(ip-matchLong); 422 - while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ 423 - goto _match_found; 424 - } 425 - } else { 337 + mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; 338 + offset = (U32)(ip-matchLong); 339 + while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ 340 + goto _match_found; 341 + } else if (dictTagsMatchL) { 426 342 /* check dictMatchState long match */ 427 - U32 const dictMatchIndexL = dictHashLong[dictHL]; 343 + U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS; 428 344 const BYTE* dictMatchL = dictBase + dictMatchIndexL; 429 345 assert(dictMatchL < dictEnd); 430 346 ··· 434 354 } } 435 355 436 356 if (matchIndexS > prefixLowestIndex) { 437 - /* check prefix short match */ 357 + /* short match candidate */ 438 358 if (MEM_read32(match) == MEM_read32(ip)) { 439 359 goto _search_next_long; 440 360 } 441 - } else { 361 + } else if (dictTagsMatchS) { 442 362 /* check dictMatchState short match */ 443 - U32 const dictMatchIndexS = dictHashSmall[dictHS]; 363 + U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS; 444 364 match = dictBase + dictMatchIndexS; 445 365 matchIndexS = dictMatchIndexS + dictIndexDelta; 446 366 ··· 455 375 continue; 456 376 457 377 _search_next_long: 458 - 459 378 { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); 460 - size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); 379 + size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8); 461 380 U32 const matchIndexL3 = hashLong[hl3]; 381 + U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS]; 382 + int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3); 462 383 const BYTE* matchL3 = base + matchIndexL3; 463 384 hashLong[hl3] = curr + 1; 464 385 465 386 /* check prefix long +1 match */ 466 - if (matchIndexL3 > prefixLowestIndex) { 467 - if (MEM_read64(matchL3) == MEM_read64(ip+1)) { 468 - mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; 469 - ip++; 470 - offset = (U32)(ip-matchL3); 471 - while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ 472 - goto _match_found; 473 - } 474 - } else { 387 + if ((matchIndexL3 >= prefixLowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1))) { 388 + mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; 389 + ip++; 390 + offset = (U32)(ip-matchL3); 391 + while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ 392 + goto _match_found; 393 + } else if (dictTagsMatchL3) { 475 394 /* check dict long +1 match */ 476 - U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; 395 + U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS; 477 396 const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; 478 397 assert(dictMatchL3 < dictEnd); 479 398 if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { ··· 498 419 offset_2 = offset_1; 499 420 offset_1 = offset; 500 421 501 - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); 422 + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); 502 423 503 424 _match_stored: 504 425 /* match found */ ··· 522 443 const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? 523 444 dictBase + repIndex2 - dictIndexDelta : 524 445 base + repIndex2; 525 - if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) 446 + if ( (ZSTD_index_overlap_check(prefixLowestIndex, repIndex2)) 526 447 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 527 448 const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; 528 449 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; 529 450 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ 530 - ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); 451 + ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); 531 452 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; 532 453 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; 533 454 ip += repLength2; ··· 540 461 } /* while (ip < ilimit) */ 541 462 542 463 /* save reps for next block */ 543 - rep[0] = offset_1 ? offset_1 : offsetSaved; 544 - rep[1] = offset_2 ? offset_2 : offsetSaved; 464 + rep[0] = offset_1; 465 + rep[1] = offset_2; 545 466 546 467 /* Return the last literals size */ 547 468 return (size_t)(iend - anchor); ··· 549 470 550 471 #define ZSTD_GEN_DFAST_FN(dictMode, mls) \ 551 472 static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \ 552 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ 473 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ 553 474 void const* src, size_t srcSize) \ 554 475 { \ 555 476 return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ ··· 567 488 568 489 569 490 size_t ZSTD_compressBlock_doubleFast( 570 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 491 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 571 492 void const* src, size_t srcSize) 572 493 { 573 494 const U32 mls = ms->cParams.minMatch; ··· 587 508 588 509 589 510 size_t ZSTD_compressBlock_doubleFast_dictMatchState( 590 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 511 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 591 512 void const* src, size_t srcSize) 592 513 { 593 514 const U32 mls = ms->cParams.minMatch; ··· 606 527 } 607 528 608 529 609 - static size_t ZSTD_compressBlock_doubleFast_extDict_generic( 610 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 530 + static 531 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 532 + size_t ZSTD_compressBlock_doubleFast_extDict_generic( 533 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 611 534 void const* src, size_t srcSize, 612 535 U32 const mls /* template */) 613 536 { ··· 660 579 size_t mLength; 661 580 hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ 662 581 663 - if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ 582 + if (((ZSTD_index_overlap_check(prefixStartIndex, repIndex)) 664 583 & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */ 665 584 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 666 585 const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; 667 586 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; 668 587 ip++; 669 - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); 588 + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); 670 589 } else { 671 590 if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { 672 591 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; ··· 677 596 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ 678 597 offset_2 = offset_1; 679 598 offset_1 = offset; 680 - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); 599 + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); 681 600 682 601 } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { 683 602 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); ··· 702 621 } 703 622 offset_2 = offset_1; 704 623 offset_1 = offset; 705 - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); 624 + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); 706 625 707 626 } else { 708 627 ip += ((ip-anchor) >> kSearchStrength) + 1; ··· 728 647 U32 const current2 = (U32)(ip-base); 729 648 U32 const repIndex2 = current2 - offset_2; 730 649 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; 731 - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ 650 + if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2)) 732 651 & (offset_2 <= current2 - dictStartIndex)) 733 652 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 734 653 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; 735 654 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; 736 655 U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ 737 - ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); 656 + ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); 738 657 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; 739 658 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; 740 659 ip += repLength2; ··· 758 677 ZSTD_GEN_DFAST_FN(extDict, 7) 759 678 760 679 size_t ZSTD_compressBlock_doubleFast_extDict( 761 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 680 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 762 681 void const* src, size_t srcSize) 763 682 { 764 683 U32 const mls = ms->cParams.minMatch; ··· 775 694 return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize); 776 695 } 777 696 } 697 + 698 + #endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
+19 -8
lib/zstd/compress/zstd_double_fast.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 12 11 #ifndef ZSTD_DOUBLE_FAST_H 13 12 #define ZSTD_DOUBLE_FAST_H 14 13 15 - 16 14 #include "../common/mem.h" /* U32 */ 17 15 #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ 18 16 19 - void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, 20 - void const* end, ZSTD_dictTableLoadMethod_e dtlm); 17 + #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR 18 + 19 + void ZSTD_fillDoubleHashTable(ZSTD_MatchState_t* ms, 20 + void const* end, ZSTD_dictTableLoadMethod_e dtlm, 21 + ZSTD_tableFillPurpose_e tfp); 22 + 21 23 size_t ZSTD_compressBlock_doubleFast( 22 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 24 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 23 25 void const* src, size_t srcSize); 24 26 size_t ZSTD_compressBlock_doubleFast_dictMatchState( 25 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 27 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 26 28 void const* src, size_t srcSize); 27 29 size_t ZSTD_compressBlock_doubleFast_extDict( 28 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 30 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 29 31 void const* src, size_t srcSize); 30 32 31 - 33 + #define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast 34 + #define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE ZSTD_compressBlock_doubleFast_dictMatchState 35 + #define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT ZSTD_compressBlock_doubleFast_extDict 36 + #else 37 + #define ZSTD_COMPRESSBLOCK_DOUBLEFAST NULL 38 + #define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE NULL 39 + #define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL 40 + #endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */ 32 41 33 42 #endif /* ZSTD_DOUBLE_FAST_H */
+722 -413
lib/zstd/compress/zstd_fast.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 12 11 #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ 13 12 #include "zstd_fast.h" 14 13 14 + static 15 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 16 + void ZSTD_fillHashTableForCDict(ZSTD_MatchState_t* ms, 17 + const void* const end, 18 + ZSTD_dictTableLoadMethod_e dtlm) 19 + { 20 + const ZSTD_compressionParameters* const cParams = &ms->cParams; 21 + U32* const hashTable = ms->hashTable; 22 + U32 const hBits = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; 23 + U32 const mls = cParams->minMatch; 24 + const BYTE* const base = ms->window.base; 25 + const BYTE* ip = base + ms->nextToUpdate; 26 + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; 27 + const U32 fastHashFillStep = 3; 15 28 16 - void ZSTD_fillHashTable(ZSTD_matchState_t* ms, 29 + /* Currently, we always use ZSTD_dtlm_full for filling CDict tables. 30 + * Feel free to remove this assert if there's a good reason! */ 31 + assert(dtlm == ZSTD_dtlm_full); 32 + 33 + /* Always insert every fastHashFillStep position into the hash table. 34 + * Insert the other positions if their hash entry is empty. 35 + */ 36 + for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { 37 + U32 const curr = (U32)(ip - base); 38 + { size_t const hashAndTag = ZSTD_hashPtr(ip, hBits, mls); 39 + ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr); } 40 + 41 + if (dtlm == ZSTD_dtlm_fast) continue; 42 + /* Only load extra positions for ZSTD_dtlm_full */ 43 + { U32 p; 44 + for (p = 1; p < fastHashFillStep; ++p) { 45 + size_t const hashAndTag = ZSTD_hashPtr(ip + p, hBits, mls); 46 + if (hashTable[hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { /* not yet filled */ 47 + ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr + p); 48 + } } } } 49 + } 50 + 51 + static 52 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 53 + void ZSTD_fillHashTableForCCtx(ZSTD_MatchState_t* ms, 17 54 const void* const end, 18 55 ZSTD_dictTableLoadMethod_e dtlm) 19 56 { ··· 63 24 const BYTE* ip = base + ms->nextToUpdate; 64 25 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; 65 26 const U32 fastHashFillStep = 3; 27 + 28 + /* Currently, we always use ZSTD_dtlm_fast for filling CCtx tables. 29 + * Feel free to remove this assert if there's a good reason! */ 30 + assert(dtlm == ZSTD_dtlm_fast); 66 31 67 32 /* Always insert every fastHashFillStep position into the hash table. 68 33 * Insert the other positions if their hash entry is empty. ··· 83 40 if (hashTable[hash] == 0) { /* not yet filled */ 84 41 hashTable[hash] = curr + p; 85 42 } } } } 43 + } 44 + 45 + void ZSTD_fillHashTable(ZSTD_MatchState_t* ms, 46 + const void* const end, 47 + ZSTD_dictTableLoadMethod_e dtlm, 48 + ZSTD_tableFillPurpose_e tfp) 49 + { 50 + if (tfp == ZSTD_tfp_forCDict) { 51 + ZSTD_fillHashTableForCDict(ms, end, dtlm); 52 + } else { 53 + ZSTD_fillHashTableForCCtx(ms, end, dtlm); 54 + } 55 + } 56 + 57 + 58 + typedef int (*ZSTD_match4Found) (const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit); 59 + 60 + static int 61 + ZSTD_match4Found_cmov(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit) 62 + { 63 + /* Array of ~random data, should have low probability of matching data. 64 + * Load from here if the index is invalid. 65 + * Used to avoid unpredictable branches. */ 66 + static const BYTE dummy[] = {0x12,0x34,0x56,0x78}; 67 + 68 + /* currentIdx >= lowLimit is a (somewhat) unpredictable branch. 69 + * However expression below compiles into conditional move. 70 + */ 71 + const BYTE* mvalAddr = ZSTD_selectAddr(matchIdx, idxLowLimit, matchAddress, dummy); 72 + /* Note: this used to be written as : return test1 && test2; 73 + * Unfortunately, once inlined, these tests become branches, 74 + * in which case it becomes critical that they are executed in the right order (test1 then test2). 75 + * So we have to write these tests in a specific manner to ensure their ordering. 76 + */ 77 + if (MEM_read32(currentPtr) != MEM_read32(mvalAddr)) return 0; 78 + /* force ordering of these tests, which matters once the function is inlined, as they become branches */ 79 + __asm__(""); 80 + return matchIdx >= idxLowLimit; 81 + } 82 + 83 + static int 84 + ZSTD_match4Found_branch(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit) 85 + { 86 + /* using a branch instead of a cmov, 87 + * because it's faster in scenarios where matchIdx >= idxLowLimit is generally true, 88 + * aka almost all candidates are within range */ 89 + U32 mval; 90 + if (matchIdx >= idxLowLimit) { 91 + mval = MEM_read32(matchAddress); 92 + } else { 93 + mval = MEM_read32(currentPtr) ^ 1; /* guaranteed to not match. */ 94 + } 95 + 96 + return (MEM_read32(currentPtr) == mval); 86 97 } 87 98 88 99 ··· 186 89 * 187 90 * This is also the work we do at the beginning to enter the loop initially. 188 91 */ 189 - FORCE_INLINE_TEMPLATE size_t 190 - ZSTD_compressBlock_fast_noDict_generic( 191 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 92 + FORCE_INLINE_TEMPLATE 93 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 94 + size_t ZSTD_compressBlock_fast_noDict_generic( 95 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 192 96 void const* src, size_t srcSize, 193 - U32 const mls, U32 const hasStep) 97 + U32 const mls, int useCmov) 194 98 { 195 99 const ZSTD_compressionParameters* const cParams = &ms->cParams; 196 100 U32* const hashTable = ms->hashTable; 197 101 U32 const hlog = cParams->hashLog; 198 - /* support stepSize of 0 */ 199 - size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2; 102 + size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; /* min 2 */ 200 103 const BYTE* const base = ms->window.base; 201 104 const BYTE* const istart = (const BYTE*)src; 202 105 const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); ··· 214 117 215 118 U32 rep_offset1 = rep[0]; 216 119 U32 rep_offset2 = rep[1]; 217 - U32 offsetSaved = 0; 120 + U32 offsetSaved1 = 0, offsetSaved2 = 0; 218 121 219 122 size_t hash0; /* hash for ip0 */ 220 123 size_t hash1; /* hash for ip1 */ 221 - U32 idx; /* match idx for ip0 */ 222 - U32 mval; /* src value at match idx */ 124 + U32 matchIdx; /* match idx for ip0 */ 223 125 224 126 U32 offcode; 225 127 const BYTE* match0; ··· 231 135 size_t step; 232 136 const BYTE* nextStep; 233 137 const size_t kStepIncr = (1 << (kSearchStrength - 1)); 138 + const ZSTD_match4Found matchFound = useCmov ? ZSTD_match4Found_cmov : ZSTD_match4Found_branch; 234 139 235 140 DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); 236 141 ip0 += (ip0 == prefixStart); 237 142 { U32 const curr = (U32)(ip0 - base); 238 143 U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); 239 144 U32 const maxRep = curr - windowLow; 240 - if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0; 241 - if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0; 145 + if (rep_offset2 > maxRep) offsetSaved2 = rep_offset2, rep_offset2 = 0; 146 + if (rep_offset1 > maxRep) offsetSaved1 = rep_offset1, rep_offset1 = 0; 147 + } 148 + 149 + /* start each op */ 150 + _start: /* Requires: ip0 */ 151 + 152 + step = stepSize; 153 + nextStep = ip0 + kStepIncr; 154 + 155 + /* calculate positions, ip0 - anchor == 0, so we skip step calc */ 156 + ip1 = ip0 + 1; 157 + ip2 = ip0 + step; 158 + ip3 = ip2 + 1; 159 + 160 + if (ip3 >= ilimit) { 161 + goto _cleanup; 162 + } 163 + 164 + hash0 = ZSTD_hashPtr(ip0, hlog, mls); 165 + hash1 = ZSTD_hashPtr(ip1, hlog, mls); 166 + 167 + matchIdx = hashTable[hash0]; 168 + 169 + do { 170 + /* load repcode match for ip[2]*/ 171 + const U32 rval = MEM_read32(ip2 - rep_offset1); 172 + 173 + /* write back hash table entry */ 174 + current0 = (U32)(ip0 - base); 175 + hashTable[hash0] = current0; 176 + 177 + /* check repcode at ip[2] */ 178 + if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) { 179 + ip0 = ip2; 180 + match0 = ip0 - rep_offset1; 181 + mLength = ip0[-1] == match0[-1]; 182 + ip0 -= mLength; 183 + match0 -= mLength; 184 + offcode = REPCODE1_TO_OFFBASE; 185 + mLength += 4; 186 + 187 + /* Write next hash table entry: it's already calculated. 188 + * This write is known to be safe because ip1 is before the 189 + * repcode (ip2). */ 190 + hashTable[hash1] = (U32)(ip1 - base); 191 + 192 + goto _match; 193 + } 194 + 195 + if (matchFound(ip0, base + matchIdx, matchIdx, prefixStartIndex)) { 196 + /* Write next hash table entry (it's already calculated). 197 + * This write is known to be safe because the ip1 == ip0 + 1, 198 + * so searching will resume after ip1 */ 199 + hashTable[hash1] = (U32)(ip1 - base); 200 + 201 + goto _offset; 202 + } 203 + 204 + /* lookup ip[1] */ 205 + matchIdx = hashTable[hash1]; 206 + 207 + /* hash ip[2] */ 208 + hash0 = hash1; 209 + hash1 = ZSTD_hashPtr(ip2, hlog, mls); 210 + 211 + /* advance to next positions */ 212 + ip0 = ip1; 213 + ip1 = ip2; 214 + ip2 = ip3; 215 + 216 + /* write back hash table entry */ 217 + current0 = (U32)(ip0 - base); 218 + hashTable[hash0] = current0; 219 + 220 + if (matchFound(ip0, base + matchIdx, matchIdx, prefixStartIndex)) { 221 + /* Write next hash table entry, since it's already calculated */ 222 + if (step <= 4) { 223 + /* Avoid writing an index if it's >= position where search will resume. 224 + * The minimum possible match has length 4, so search can resume at ip0 + 4. 225 + */ 226 + hashTable[hash1] = (U32)(ip1 - base); 227 + } 228 + goto _offset; 229 + } 230 + 231 + /* lookup ip[1] */ 232 + matchIdx = hashTable[hash1]; 233 + 234 + /* hash ip[2] */ 235 + hash0 = hash1; 236 + hash1 = ZSTD_hashPtr(ip2, hlog, mls); 237 + 238 + /* advance to next positions */ 239 + ip0 = ip1; 240 + ip1 = ip2; 241 + ip2 = ip0 + step; 242 + ip3 = ip1 + step; 243 + 244 + /* calculate step */ 245 + if (ip2 >= nextStep) { 246 + step++; 247 + PREFETCH_L1(ip1 + 64); 248 + PREFETCH_L1(ip1 + 128); 249 + nextStep += kStepIncr; 250 + } 251 + } while (ip3 < ilimit); 252 + 253 + _cleanup: 254 + /* Note that there are probably still a couple positions one could search. 255 + * However, it seems to be a meaningful performance hit to try to search 256 + * them. So let's not. */ 257 + 258 + /* When the repcodes are outside of the prefix, we set them to zero before the loop. 259 + * When the offsets are still zero, we need to restore them after the block to have a correct 260 + * repcode history. If only one offset was invalid, it is easy. The tricky case is when both 261 + * offsets were invalid. We need to figure out which offset to refill with. 262 + * - If both offsets are zero they are in the same order. 263 + * - If both offsets are non-zero, we won't restore the offsets from `offsetSaved[12]`. 264 + * - If only one is zero, we need to decide which offset to restore. 265 + * - If rep_offset1 is non-zero, then rep_offset2 must be offsetSaved1. 266 + * - It is impossible for rep_offset2 to be non-zero. 267 + * 268 + * So if rep_offset1 started invalid (offsetSaved1 != 0) and became valid (rep_offset1 != 0), then 269 + * set rep[0] = rep_offset1 and rep[1] = offsetSaved1. 270 + */ 271 + offsetSaved2 = ((offsetSaved1 != 0) && (rep_offset1 != 0)) ? offsetSaved1 : offsetSaved2; 272 + 273 + /* save reps for next block */ 274 + rep[0] = rep_offset1 ? rep_offset1 : offsetSaved1; 275 + rep[1] = rep_offset2 ? rep_offset2 : offsetSaved2; 276 + 277 + /* Return the last literals size */ 278 + return (size_t)(iend - anchor); 279 + 280 + _offset: /* Requires: ip0, idx */ 281 + 282 + /* Compute the offset code. */ 283 + match0 = base + matchIdx; 284 + rep_offset2 = rep_offset1; 285 + rep_offset1 = (U32)(ip0-match0); 286 + offcode = OFFSET_TO_OFFBASE(rep_offset1); 287 + mLength = 4; 288 + 289 + /* Count the backwards match length. */ 290 + while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) { 291 + ip0--; 292 + match0--; 293 + mLength++; 294 + } 295 + 296 + _match: /* Requires: ip0, match0, offcode */ 297 + 298 + /* Count the forward length. */ 299 + mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend); 300 + 301 + ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength); 302 + 303 + ip0 += mLength; 304 + anchor = ip0; 305 + 306 + /* Fill table and check for immediate repcode. */ 307 + if (ip0 <= ilimit) { 308 + /* Fill Table */ 309 + assert(base+current0+2 > istart); /* check base overflow */ 310 + hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ 311 + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); 312 + 313 + if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */ 314 + while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) { 315 + /* store sequence */ 316 + size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4; 317 + { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */ 318 + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); 319 + ip0 += rLength; 320 + ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, REPCODE1_TO_OFFBASE, rLength); 321 + anchor = ip0; 322 + continue; /* faster when present (confirmed on gcc-8) ... (?) */ 323 + } } } 324 + 325 + goto _start; 326 + } 327 + 328 + #define ZSTD_GEN_FAST_FN(dictMode, mml, cmov) \ 329 + static size_t ZSTD_compressBlock_fast_##dictMode##_##mml##_##cmov( \ 330 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ 331 + void const* src, size_t srcSize) \ 332 + { \ 333 + return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mml, cmov); \ 334 + } 335 + 336 + ZSTD_GEN_FAST_FN(noDict, 4, 1) 337 + ZSTD_GEN_FAST_FN(noDict, 5, 1) 338 + ZSTD_GEN_FAST_FN(noDict, 6, 1) 339 + ZSTD_GEN_FAST_FN(noDict, 7, 1) 340 + 341 + ZSTD_GEN_FAST_FN(noDict, 4, 0) 342 + ZSTD_GEN_FAST_FN(noDict, 5, 0) 343 + ZSTD_GEN_FAST_FN(noDict, 6, 0) 344 + ZSTD_GEN_FAST_FN(noDict, 7, 0) 345 + 346 + size_t ZSTD_compressBlock_fast( 347 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 348 + void const* src, size_t srcSize) 349 + { 350 + U32 const mml = ms->cParams.minMatch; 351 + /* use cmov when "candidate in range" branch is likely unpredictable */ 352 + int const useCmov = ms->cParams.windowLog < 19; 353 + assert(ms->dictMatchState == NULL); 354 + if (useCmov) { 355 + switch(mml) 356 + { 357 + default: /* includes case 3 */ 358 + case 4 : 359 + return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize); 360 + case 5 : 361 + return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize); 362 + case 6 : 363 + return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize); 364 + case 7 : 365 + return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize); 366 + } 367 + } else { 368 + /* use a branch instead */ 369 + switch(mml) 370 + { 371 + default: /* includes case 3 */ 372 + case 4 : 373 + return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize); 374 + case 5 : 375 + return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize); 376 + case 6 : 377 + return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize); 378 + case 7 : 379 + return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize); 380 + } 381 + } 382 + } 383 + 384 + FORCE_INLINE_TEMPLATE 385 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 386 + size_t ZSTD_compressBlock_fast_dictMatchState_generic( 387 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 388 + void const* src, size_t srcSize, U32 const mls, U32 const hasStep) 389 + { 390 + const ZSTD_compressionParameters* const cParams = &ms->cParams; 391 + U32* const hashTable = ms->hashTable; 392 + U32 const hlog = cParams->hashLog; 393 + /* support stepSize of 0 */ 394 + U32 const stepSize = cParams->targetLength + !(cParams->targetLength); 395 + const BYTE* const base = ms->window.base; 396 + const BYTE* const istart = (const BYTE*)src; 397 + const BYTE* ip0 = istart; 398 + const BYTE* ip1 = ip0 + stepSize; /* we assert below that stepSize >= 1 */ 399 + const BYTE* anchor = istart; 400 + const U32 prefixStartIndex = ms->window.dictLimit; 401 + const BYTE* const prefixStart = base + prefixStartIndex; 402 + const BYTE* const iend = istart + srcSize; 403 + const BYTE* const ilimit = iend - HASH_READ_SIZE; 404 + U32 offset_1=rep[0], offset_2=rep[1]; 405 + 406 + const ZSTD_MatchState_t* const dms = ms->dictMatchState; 407 + const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; 408 + const U32* const dictHashTable = dms->hashTable; 409 + const U32 dictStartIndex = dms->window.dictLimit; 410 + const BYTE* const dictBase = dms->window.base; 411 + const BYTE* const dictStart = dictBase + dictStartIndex; 412 + const BYTE* const dictEnd = dms->window.nextSrc; 413 + const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); 414 + const U32 dictAndPrefixLength = (U32)(istart - prefixStart + dictEnd - dictStart); 415 + const U32 dictHBits = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; 416 + 417 + /* if a dictionary is still attached, it necessarily means that 418 + * it is within window size. So we just check it. */ 419 + const U32 maxDistance = 1U << cParams->windowLog; 420 + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); 421 + assert(endIndex - prefixStartIndex <= maxDistance); 422 + (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ 423 + 424 + (void)hasStep; /* not currently specialized on whether it's accelerated */ 425 + 426 + /* ensure there will be no underflow 427 + * when translating a dict index into a local index */ 428 + assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); 429 + 430 + if (ms->prefetchCDictTables) { 431 + size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); 432 + PREFETCH_AREA(dictHashTable, hashTableBytes); 433 + } 434 + 435 + /* init */ 436 + DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); 437 + ip0 += (dictAndPrefixLength == 0); 438 + /* dictMatchState repCode checks don't currently handle repCode == 0 439 + * disabling. */ 440 + assert(offset_1 <= dictAndPrefixLength); 441 + assert(offset_2 <= dictAndPrefixLength); 442 + 443 + /* Outer search loop */ 444 + assert(stepSize >= 1); 445 + while (ip1 <= ilimit) { /* repcode check at (ip0 + 1) is safe because ip0 < ip1 */ 446 + size_t mLength; 447 + size_t hash0 = ZSTD_hashPtr(ip0, hlog, mls); 448 + 449 + size_t const dictHashAndTag0 = ZSTD_hashPtr(ip0, dictHBits, mls); 450 + U32 dictMatchIndexAndTag = dictHashTable[dictHashAndTag0 >> ZSTD_SHORT_CACHE_TAG_BITS]; 451 + int dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag0); 452 + 453 + U32 matchIndex = hashTable[hash0]; 454 + U32 curr = (U32)(ip0 - base); 455 + size_t step = stepSize; 456 + const size_t kStepIncr = 1 << kSearchStrength; 457 + const BYTE* nextStep = ip0 + kStepIncr; 458 + 459 + /* Inner search loop */ 460 + while (1) { 461 + const BYTE* match = base + matchIndex; 462 + const U32 repIndex = curr + 1 - offset_1; 463 + const BYTE* repMatch = (repIndex < prefixStartIndex) ? 464 + dictBase + (repIndex - dictIndexDelta) : 465 + base + repIndex; 466 + const size_t hash1 = ZSTD_hashPtr(ip1, hlog, mls); 467 + size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls); 468 + hashTable[hash0] = curr; /* update hash table */ 469 + 470 + if ((ZSTD_index_overlap_check(prefixStartIndex, repIndex)) 471 + && (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) { 472 + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; 473 + mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4; 474 + ip0++; 475 + ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); 476 + break; 477 + } 478 + 479 + if (dictTagsMatch) { 480 + /* Found a possible dict match */ 481 + const U32 dictMatchIndex = dictMatchIndexAndTag >> ZSTD_SHORT_CACHE_TAG_BITS; 482 + const BYTE* dictMatch = dictBase + dictMatchIndex; 483 + if (dictMatchIndex > dictStartIndex && 484 + MEM_read32(dictMatch) == MEM_read32(ip0)) { 485 + /* To replicate extDict parse behavior, we only use dict matches when the normal matchIndex is invalid */ 486 + if (matchIndex <= prefixStartIndex) { 487 + U32 const offset = (U32) (curr - dictMatchIndex - dictIndexDelta); 488 + mLength = ZSTD_count_2segments(ip0 + 4, dictMatch + 4, iend, dictEnd, prefixStart) + 4; 489 + while (((ip0 > anchor) & (dictMatch > dictStart)) 490 + && (ip0[-1] == dictMatch[-1])) { 491 + ip0--; 492 + dictMatch--; 493 + mLength++; 494 + } /* catch up */ 495 + offset_2 = offset_1; 496 + offset_1 = offset; 497 + ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); 498 + break; 499 + } 500 + } 501 + } 502 + 503 + if (ZSTD_match4Found_cmov(ip0, match, matchIndex, prefixStartIndex)) { 504 + /* found a regular match of size >= 4 */ 505 + U32 const offset = (U32) (ip0 - match); 506 + mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4; 507 + while (((ip0 > anchor) & (match > prefixStart)) 508 + && (ip0[-1] == match[-1])) { 509 + ip0--; 510 + match--; 511 + mLength++; 512 + } /* catch up */ 513 + offset_2 = offset_1; 514 + offset_1 = offset; 515 + ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); 516 + break; 517 + } 518 + 519 + /* Prepare for next iteration */ 520 + dictMatchIndexAndTag = dictHashTable[dictHashAndTag1 >> ZSTD_SHORT_CACHE_TAG_BITS]; 521 + dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag1); 522 + matchIndex = hashTable[hash1]; 523 + 524 + if (ip1 >= nextStep) { 525 + step++; 526 + nextStep += kStepIncr; 527 + } 528 + ip0 = ip1; 529 + ip1 = ip1 + step; 530 + if (ip1 > ilimit) goto _cleanup; 531 + 532 + curr = (U32)(ip0 - base); 533 + hash0 = hash1; 534 + } /* end inner search loop */ 535 + 536 + /* match found */ 537 + assert(mLength); 538 + ip0 += mLength; 539 + anchor = ip0; 540 + 541 + if (ip0 <= ilimit) { 542 + /* Fill Table */ 543 + assert(base+curr+2 > istart); /* check base overflow */ 544 + hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */ 545 + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); 546 + 547 + /* check immediate repcode */ 548 + while (ip0 <= ilimit) { 549 + U32 const current2 = (U32)(ip0-base); 550 + U32 const repIndex2 = current2 - offset_2; 551 + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? 552 + dictBase - dictIndexDelta + repIndex2 : 553 + base + repIndex2; 554 + if ( (ZSTD_index_overlap_check(prefixStartIndex, repIndex2)) 555 + && (MEM_read32(repMatch2) == MEM_read32(ip0))) { 556 + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; 557 + size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; 558 + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ 559 + ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); 560 + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = current2; 561 + ip0 += repLength2; 562 + anchor = ip0; 563 + continue; 564 + } 565 + break; 566 + } 567 + } 568 + 569 + /* Prepare for next iteration */ 570 + assert(ip0 == anchor); 571 + ip1 = ip0 + stepSize; 572 + } 573 + 574 + _cleanup: 575 + /* save reps for next block */ 576 + rep[0] = offset_1; 577 + rep[1] = offset_2; 578 + 579 + /* Return the last literals size */ 580 + return (size_t)(iend - anchor); 581 + } 582 + 583 + 584 + ZSTD_GEN_FAST_FN(dictMatchState, 4, 0) 585 + ZSTD_GEN_FAST_FN(dictMatchState, 5, 0) 586 + ZSTD_GEN_FAST_FN(dictMatchState, 6, 0) 587 + ZSTD_GEN_FAST_FN(dictMatchState, 7, 0) 588 + 589 + size_t ZSTD_compressBlock_fast_dictMatchState( 590 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 591 + void const* src, size_t srcSize) 592 + { 593 + U32 const mls = ms->cParams.minMatch; 594 + assert(ms->dictMatchState != NULL); 595 + switch(mls) 596 + { 597 + default: /* includes case 3 */ 598 + case 4 : 599 + return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize); 600 + case 5 : 601 + return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize); 602 + case 6 : 603 + return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize); 604 + case 7 : 605 + return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize); 606 + } 607 + } 608 + 609 + 610 + static 611 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 612 + size_t ZSTD_compressBlock_fast_extDict_generic( 613 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 614 + void const* src, size_t srcSize, U32 const mls, U32 const hasStep) 615 + { 616 + const ZSTD_compressionParameters* const cParams = &ms->cParams; 617 + U32* const hashTable = ms->hashTable; 618 + U32 const hlog = cParams->hashLog; 619 + /* support stepSize of 0 */ 620 + size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; 621 + const BYTE* const base = ms->window.base; 622 + const BYTE* const dictBase = ms->window.dictBase; 623 + const BYTE* const istart = (const BYTE*)src; 624 + const BYTE* anchor = istart; 625 + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); 626 + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); 627 + const U32 dictStartIndex = lowLimit; 628 + const BYTE* const dictStart = dictBase + dictStartIndex; 629 + const U32 dictLimit = ms->window.dictLimit; 630 + const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; 631 + const BYTE* const prefixStart = base + prefixStartIndex; 632 + const BYTE* const dictEnd = dictBase + prefixStartIndex; 633 + const BYTE* const iend = istart + srcSize; 634 + const BYTE* const ilimit = iend - 8; 635 + U32 offset_1=rep[0], offset_2=rep[1]; 636 + U32 offsetSaved1 = 0, offsetSaved2 = 0; 637 + 638 + const BYTE* ip0 = istart; 639 + const BYTE* ip1; 640 + const BYTE* ip2; 641 + const BYTE* ip3; 642 + U32 current0; 643 + 644 + 645 + size_t hash0; /* hash for ip0 */ 646 + size_t hash1; /* hash for ip1 */ 647 + U32 idx; /* match idx for ip0 */ 648 + const BYTE* idxBase; /* base pointer for idx */ 649 + 650 + U32 offcode; 651 + const BYTE* match0; 652 + size_t mLength; 653 + const BYTE* matchEnd = 0; /* initialize to avoid warning, assert != 0 later */ 654 + 655 + size_t step; 656 + const BYTE* nextStep; 657 + const size_t kStepIncr = (1 << (kSearchStrength - 1)); 658 + 659 + (void)hasStep; /* not currently specialized on whether it's accelerated */ 660 + 661 + DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); 662 + 663 + /* switch to "regular" variant if extDict is invalidated due to maxDistance */ 664 + if (prefixStartIndex == dictStartIndex) 665 + return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize); 666 + 667 + { U32 const curr = (U32)(ip0 - base); 668 + U32 const maxRep = curr - dictStartIndex; 669 + if (offset_2 >= maxRep) offsetSaved2 = offset_2, offset_2 = 0; 670 + if (offset_1 >= maxRep) offsetSaved1 = offset_1, offset_1 = 0; 242 671 } 243 672 244 673 /* start each op */ ··· 785 164 hash1 = ZSTD_hashPtr(ip1, hlog, mls); 786 165 787 166 idx = hashTable[hash0]; 167 + idxBase = idx < prefixStartIndex ? dictBase : base; 788 168 789 169 do { 790 - /* load repcode match for ip[2]*/ 791 - const U32 rval = MEM_read32(ip2 - rep_offset1); 170 + { /* load repcode match for ip[2] */ 171 + U32 const current2 = (U32)(ip2 - base); 172 + U32 const repIndex = current2 - offset_1; 173 + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; 174 + U32 rval; 175 + if ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */ 176 + & (offset_1 > 0) ) { 177 + rval = MEM_read32(repBase + repIndex); 178 + } else { 179 + rval = MEM_read32(ip2) ^ 1; /* guaranteed to not match. */ 180 + } 792 181 793 - /* write back hash table entry */ 794 - current0 = (U32)(ip0 - base); 795 - hashTable[hash0] = current0; 182 + /* write back hash table entry */ 183 + current0 = (U32)(ip0 - base); 184 + hashTable[hash0] = current0; 796 185 797 - /* check repcode at ip[2] */ 798 - if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) { 799 - ip0 = ip2; 800 - match0 = ip0 - rep_offset1; 801 - mLength = ip0[-1] == match0[-1]; 802 - ip0 -= mLength; 803 - match0 -= mLength; 804 - offcode = STORE_REPCODE_1; 805 - mLength += 4; 806 - goto _match; 807 - } 186 + /* check repcode at ip[2] */ 187 + if (MEM_read32(ip2) == rval) { 188 + ip0 = ip2; 189 + match0 = repBase + repIndex; 190 + matchEnd = repIndex < prefixStartIndex ? dictEnd : iend; 191 + assert((match0 != prefixStart) & (match0 != dictStart)); 192 + mLength = ip0[-1] == match0[-1]; 193 + ip0 -= mLength; 194 + match0 -= mLength; 195 + offcode = REPCODE1_TO_OFFBASE; 196 + mLength += 4; 197 + goto _match; 198 + } } 808 199 809 - /* load match for ip[0] */ 810 - if (idx >= prefixStartIndex) { 811 - mval = MEM_read32(base + idx); 812 - } else { 813 - mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ 814 - } 200 + { /* load match for ip[0] */ 201 + U32 const mval = idx >= dictStartIndex ? 202 + MEM_read32(idxBase + idx) : 203 + MEM_read32(ip0) ^ 1; /* guaranteed not to match */ 815 204 816 - /* check match at ip[0] */ 817 - if (MEM_read32(ip0) == mval) { 818 - /* found a match! */ 819 - goto _offset; 820 - } 205 + /* check match at ip[0] */ 206 + if (MEM_read32(ip0) == mval) { 207 + /* found a match! */ 208 + goto _offset; 209 + } } 821 210 822 211 /* lookup ip[1] */ 823 212 idx = hashTable[hash1]; 213 + idxBase = idx < prefixStartIndex ? dictBase : base; 824 214 825 215 /* hash ip[2] */ 826 216 hash0 = hash1; ··· 846 214 current0 = (U32)(ip0 - base); 847 215 hashTable[hash0] = current0; 848 216 849 - /* load match for ip[0] */ 850 - if (idx >= prefixStartIndex) { 851 - mval = MEM_read32(base + idx); 852 - } else { 853 - mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ 854 - } 217 + { /* load match for ip[0] */ 218 + U32 const mval = idx >= dictStartIndex ? 219 + MEM_read32(idxBase + idx) : 220 + MEM_read32(ip0) ^ 1; /* guaranteed not to match */ 855 221 856 - /* check match at ip[0] */ 857 - if (MEM_read32(ip0) == mval) { 858 - /* found a match! */ 859 - goto _offset; 860 - } 222 + /* check match at ip[0] */ 223 + if (MEM_read32(ip0) == mval) { 224 + /* found a match! */ 225 + goto _offset; 226 + } } 861 227 862 228 /* lookup ip[1] */ 863 229 idx = hashTable[hash1]; 230 + idxBase = idx < prefixStartIndex ? dictBase : base; 864 231 865 232 /* hash ip[2] */ 866 233 hash0 = hash1; ··· 885 254 * However, it seems to be a meaningful performance hit to try to search 886 255 * them. So let's not. */ 887 256 257 + /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), 258 + * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ 259 + offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; 260 + 888 261 /* save reps for next block */ 889 - rep[0] = rep_offset1 ? rep_offset1 : offsetSaved; 890 - rep[1] = rep_offset2 ? rep_offset2 : offsetSaved; 262 + rep[0] = offset_1 ? offset_1 : offsetSaved1; 263 + rep[1] = offset_2 ? offset_2 : offsetSaved2; 891 264 892 265 /* Return the last literals size */ 893 266 return (size_t)(iend - anchor); 894 267 895 - _offset: /* Requires: ip0, idx */ 268 + _offset: /* Requires: ip0, idx, idxBase */ 896 269 897 270 /* Compute the offset code. */ 898 - match0 = base + idx; 899 - rep_offset2 = rep_offset1; 900 - rep_offset1 = (U32)(ip0-match0); 901 - offcode = STORE_OFFSET(rep_offset1); 902 - mLength = 4; 271 + { U32 const offset = current0 - idx; 272 + const BYTE* const lowMatchPtr = idx < prefixStartIndex ? dictStart : prefixStart; 273 + matchEnd = idx < prefixStartIndex ? dictEnd : iend; 274 + match0 = idxBase + idx; 275 + offset_2 = offset_1; 276 + offset_1 = offset; 277 + offcode = OFFSET_TO_OFFBASE(offset); 278 + mLength = 4; 903 279 904 - /* Count the backwards match length. */ 905 - while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) { 906 - ip0--; 907 - match0--; 908 - mLength++; 909 - } 280 + /* Count the backwards match length. */ 281 + while (((ip0>anchor) & (match0>lowMatchPtr)) && (ip0[-1] == match0[-1])) { 282 + ip0--; 283 + match0--; 284 + mLength++; 285 + } } 910 286 911 - _match: /* Requires: ip0, match0, offcode */ 287 + _match: /* Requires: ip0, match0, offcode, matchEnd */ 912 288 913 289 /* Count the forward length. */ 914 - mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend); 290 + assert(matchEnd != 0); 291 + mLength += ZSTD_count_2segments(ip0 + mLength, match0 + mLength, iend, matchEnd, prefixStart); 915 292 916 293 ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength); 917 294 ··· 938 299 hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ 939 300 hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); 940 301 941 - if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */ 942 - while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) { 943 - /* store sequence */ 944 - size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4; 945 - { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */ 302 + while (ip0 <= ilimit) { 303 + U32 const repIndex2 = (U32)(ip0-base) - offset_2; 304 + const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; 305 + if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2)) & (offset_2 > 0)) 306 + && (MEM_read32(repMatch2) == MEM_read32(ip0)) ) { 307 + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; 308 + size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; 309 + { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ 310 + ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); 946 311 hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); 947 - ip0 += rLength; 948 - ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength); 312 + ip0 += repLength2; 949 313 anchor = ip0; 950 - continue; /* faster when present (confirmed on gcc-8) ... (?) */ 951 - } } } 314 + continue; 315 + } 316 + break; 317 + } } 952 318 953 319 goto _start; 954 - } 955 - 956 - #define ZSTD_GEN_FAST_FN(dictMode, mls, step) \ 957 - static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step( \ 958 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ 959 - void const* src, size_t srcSize) \ 960 - { \ 961 - return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \ 962 - } 963 - 964 - ZSTD_GEN_FAST_FN(noDict, 4, 1) 965 - ZSTD_GEN_FAST_FN(noDict, 5, 1) 966 - ZSTD_GEN_FAST_FN(noDict, 6, 1) 967 - ZSTD_GEN_FAST_FN(noDict, 7, 1) 968 - 969 - ZSTD_GEN_FAST_FN(noDict, 4, 0) 970 - ZSTD_GEN_FAST_FN(noDict, 5, 0) 971 - ZSTD_GEN_FAST_FN(noDict, 6, 0) 972 - ZSTD_GEN_FAST_FN(noDict, 7, 0) 973 - 974 - size_t ZSTD_compressBlock_fast( 975 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 976 - void const* src, size_t srcSize) 977 - { 978 - U32 const mls = ms->cParams.minMatch; 979 - assert(ms->dictMatchState == NULL); 980 - if (ms->cParams.targetLength > 1) { 981 - switch(mls) 982 - { 983 - default: /* includes case 3 */ 984 - case 4 : 985 - return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize); 986 - case 5 : 987 - return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize); 988 - case 6 : 989 - return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize); 990 - case 7 : 991 - return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize); 992 - } 993 - } else { 994 - switch(mls) 995 - { 996 - default: /* includes case 3 */ 997 - case 4 : 998 - return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize); 999 - case 5 : 1000 - return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize); 1001 - case 6 : 1002 - return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize); 1003 - case 7 : 1004 - return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize); 1005 - } 1006 - 1007 - } 1008 - } 1009 - 1010 - FORCE_INLINE_TEMPLATE 1011 - size_t ZSTD_compressBlock_fast_dictMatchState_generic( 1012 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1013 - void const* src, size_t srcSize, U32 const mls, U32 const hasStep) 1014 - { 1015 - const ZSTD_compressionParameters* const cParams = &ms->cParams; 1016 - U32* const hashTable = ms->hashTable; 1017 - U32 const hlog = cParams->hashLog; 1018 - /* support stepSize of 0 */ 1019 - U32 const stepSize = cParams->targetLength + !(cParams->targetLength); 1020 - const BYTE* const base = ms->window.base; 1021 - const BYTE* const istart = (const BYTE*)src; 1022 - const BYTE* ip = istart; 1023 - const BYTE* anchor = istart; 1024 - const U32 prefixStartIndex = ms->window.dictLimit; 1025 - const BYTE* const prefixStart = base + prefixStartIndex; 1026 - const BYTE* const iend = istart + srcSize; 1027 - const BYTE* const ilimit = iend - HASH_READ_SIZE; 1028 - U32 offset_1=rep[0], offset_2=rep[1]; 1029 - U32 offsetSaved = 0; 1030 - 1031 - const ZSTD_matchState_t* const dms = ms->dictMatchState; 1032 - const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; 1033 - const U32* const dictHashTable = dms->hashTable; 1034 - const U32 dictStartIndex = dms->window.dictLimit; 1035 - const BYTE* const dictBase = dms->window.base; 1036 - const BYTE* const dictStart = dictBase + dictStartIndex; 1037 - const BYTE* const dictEnd = dms->window.nextSrc; 1038 - const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); 1039 - const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); 1040 - const U32 dictHLog = dictCParams->hashLog; 1041 - 1042 - /* if a dictionary is still attached, it necessarily means that 1043 - * it is within window size. So we just check it. */ 1044 - const U32 maxDistance = 1U << cParams->windowLog; 1045 - const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); 1046 - assert(endIndex - prefixStartIndex <= maxDistance); 1047 - (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ 1048 - 1049 - (void)hasStep; /* not currently specialized on whether it's accelerated */ 1050 - 1051 - /* ensure there will be no underflow 1052 - * when translating a dict index into a local index */ 1053 - assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); 1054 - 1055 - /* init */ 1056 - DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); 1057 - ip += (dictAndPrefixLength == 0); 1058 - /* dictMatchState repCode checks don't currently handle repCode == 0 1059 - * disabling. */ 1060 - assert(offset_1 <= dictAndPrefixLength); 1061 - assert(offset_2 <= dictAndPrefixLength); 1062 - 1063 - /* Main Search Loop */ 1064 - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ 1065 - size_t mLength; 1066 - size_t const h = ZSTD_hashPtr(ip, hlog, mls); 1067 - U32 const curr = (U32)(ip-base); 1068 - U32 const matchIndex = hashTable[h]; 1069 - const BYTE* match = base + matchIndex; 1070 - const U32 repIndex = curr + 1 - offset_1; 1071 - const BYTE* repMatch = (repIndex < prefixStartIndex) ? 1072 - dictBase + (repIndex - dictIndexDelta) : 1073 - base + repIndex; 1074 - hashTable[h] = curr; /* update hash table */ 1075 - 1076 - if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ 1077 - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 1078 - const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; 1079 - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; 1080 - ip++; 1081 - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); 1082 - } else if ( (matchIndex <= prefixStartIndex) ) { 1083 - size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); 1084 - U32 const dictMatchIndex = dictHashTable[dictHash]; 1085 - const BYTE* dictMatch = dictBase + dictMatchIndex; 1086 - if (dictMatchIndex <= dictStartIndex || 1087 - MEM_read32(dictMatch) != MEM_read32(ip)) { 1088 - assert(stepSize >= 1); 1089 - ip += ((ip-anchor) >> kSearchStrength) + stepSize; 1090 - continue; 1091 - } else { 1092 - /* found a dict match */ 1093 - U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta); 1094 - mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; 1095 - while (((ip>anchor) & (dictMatch>dictStart)) 1096 - && (ip[-1] == dictMatch[-1])) { 1097 - ip--; dictMatch--; mLength++; 1098 - } /* catch up */ 1099 - offset_2 = offset_1; 1100 - offset_1 = offset; 1101 - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); 1102 - } 1103 - } else if (MEM_read32(match) != MEM_read32(ip)) { 1104 - /* it's not a match, and we're not going to check the dictionary */ 1105 - assert(stepSize >= 1); 1106 - ip += ((ip-anchor) >> kSearchStrength) + stepSize; 1107 - continue; 1108 - } else { 1109 - /* found a regular match */ 1110 - U32 const offset = (U32)(ip-match); 1111 - mLength = ZSTD_count(ip+4, match+4, iend) + 4; 1112 - while (((ip>anchor) & (match>prefixStart)) 1113 - && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 1114 - offset_2 = offset_1; 1115 - offset_1 = offset; 1116 - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); 1117 - } 1118 - 1119 - /* match found */ 1120 - ip += mLength; 1121 - anchor = ip; 1122 - 1123 - if (ip <= ilimit) { 1124 - /* Fill Table */ 1125 - assert(base+curr+2 > istart); /* check base overflow */ 1126 - hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */ 1127 - hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); 1128 - 1129 - /* check immediate repcode */ 1130 - while (ip <= ilimit) { 1131 - U32 const current2 = (U32)(ip-base); 1132 - U32 const repIndex2 = current2 - offset_2; 1133 - const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? 1134 - dictBase - dictIndexDelta + repIndex2 : 1135 - base + repIndex2; 1136 - if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) 1137 - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 1138 - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; 1139 - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; 1140 - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ 1141 - ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); 1142 - hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; 1143 - ip += repLength2; 1144 - anchor = ip; 1145 - continue; 1146 - } 1147 - break; 1148 - } 1149 - } 1150 - } 1151 - 1152 - /* save reps for next block */ 1153 - rep[0] = offset_1 ? offset_1 : offsetSaved; 1154 - rep[1] = offset_2 ? offset_2 : offsetSaved; 1155 - 1156 - /* Return the last literals size */ 1157 - return (size_t)(iend - anchor); 1158 - } 1159 - 1160 - 1161 - ZSTD_GEN_FAST_FN(dictMatchState, 4, 0) 1162 - ZSTD_GEN_FAST_FN(dictMatchState, 5, 0) 1163 - ZSTD_GEN_FAST_FN(dictMatchState, 6, 0) 1164 - ZSTD_GEN_FAST_FN(dictMatchState, 7, 0) 1165 - 1166 - size_t ZSTD_compressBlock_fast_dictMatchState( 1167 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1168 - void const* src, size_t srcSize) 1169 - { 1170 - U32 const mls = ms->cParams.minMatch; 1171 - assert(ms->dictMatchState != NULL); 1172 - switch(mls) 1173 - { 1174 - default: /* includes case 3 */ 1175 - case 4 : 1176 - return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize); 1177 - case 5 : 1178 - return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize); 1179 - case 6 : 1180 - return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize); 1181 - case 7 : 1182 - return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize); 1183 - } 1184 - } 1185 - 1186 - 1187 - static size_t ZSTD_compressBlock_fast_extDict_generic( 1188 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1189 - void const* src, size_t srcSize, U32 const mls, U32 const hasStep) 1190 - { 1191 - const ZSTD_compressionParameters* const cParams = &ms->cParams; 1192 - U32* const hashTable = ms->hashTable; 1193 - U32 const hlog = cParams->hashLog; 1194 - /* support stepSize of 0 */ 1195 - U32 const stepSize = cParams->targetLength + !(cParams->targetLength); 1196 - const BYTE* const base = ms->window.base; 1197 - const BYTE* const dictBase = ms->window.dictBase; 1198 - const BYTE* const istart = (const BYTE*)src; 1199 - const BYTE* ip = istart; 1200 - const BYTE* anchor = istart; 1201 - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); 1202 - const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); 1203 - const U32 dictStartIndex = lowLimit; 1204 - const BYTE* const dictStart = dictBase + dictStartIndex; 1205 - const U32 dictLimit = ms->window.dictLimit; 1206 - const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; 1207 - const BYTE* const prefixStart = base + prefixStartIndex; 1208 - const BYTE* const dictEnd = dictBase + prefixStartIndex; 1209 - const BYTE* const iend = istart + srcSize; 1210 - const BYTE* const ilimit = iend - 8; 1211 - U32 offset_1=rep[0], offset_2=rep[1]; 1212 - 1213 - (void)hasStep; /* not currently specialized on whether it's accelerated */ 1214 - 1215 - DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); 1216 - 1217 - /* switch to "regular" variant if extDict is invalidated due to maxDistance */ 1218 - if (prefixStartIndex == dictStartIndex) 1219 - return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize); 1220 - 1221 - /* Search Loop */ 1222 - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ 1223 - const size_t h = ZSTD_hashPtr(ip, hlog, mls); 1224 - const U32 matchIndex = hashTable[h]; 1225 - const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; 1226 - const BYTE* match = matchBase + matchIndex; 1227 - const U32 curr = (U32)(ip-base); 1228 - const U32 repIndex = curr + 1 - offset_1; 1229 - const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; 1230 - const BYTE* const repMatch = repBase + repIndex; 1231 - hashTable[h] = curr; /* update hash table */ 1232 - DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr); 1233 - 1234 - if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ 1235 - & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */ 1236 - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 1237 - const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; 1238 - size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; 1239 - ip++; 1240 - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength); 1241 - ip += rLength; 1242 - anchor = ip; 1243 - } else { 1244 - if ( (matchIndex < dictStartIndex) || 1245 - (MEM_read32(match) != MEM_read32(ip)) ) { 1246 - assert(stepSize >= 1); 1247 - ip += ((ip-anchor) >> kSearchStrength) + stepSize; 1248 - continue; 1249 - } 1250 - { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; 1251 - const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; 1252 - U32 const offset = curr - matchIndex; 1253 - size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; 1254 - while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ 1255 - offset_2 = offset_1; offset_1 = offset; /* update offset history */ 1256 - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); 1257 - ip += mLength; 1258 - anchor = ip; 1259 - } } 1260 - 1261 - if (ip <= ilimit) { 1262 - /* Fill Table */ 1263 - hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; 1264 - hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); 1265 - /* check immediate repcode */ 1266 - while (ip <= ilimit) { 1267 - U32 const current2 = (U32)(ip-base); 1268 - U32 const repIndex2 = current2 - offset_2; 1269 - const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; 1270 - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */ 1271 - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { 1272 - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; 1273 - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; 1274 - { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ 1275 - ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2); 1276 - hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; 1277 - ip += repLength2; 1278 - anchor = ip; 1279 - continue; 1280 - } 1281 - break; 1282 - } } } 1283 - 1284 - /* save reps for next block */ 1285 - rep[0] = offset_1; 1286 - rep[1] = offset_2; 1287 - 1288 - /* Return the last literals size */ 1289 - return (size_t)(iend - anchor); 1290 320 } 1291 321 1292 322 ZSTD_GEN_FAST_FN(extDict, 4, 0) ··· 964 656 ZSTD_GEN_FAST_FN(extDict, 7, 0) 965 657 966 658 size_t ZSTD_compressBlock_fast_extDict( 967 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 659 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 968 660 void const* src, size_t srcSize) 969 661 { 970 662 U32 const mls = ms->cParams.minMatch; 663 + assert(ms->dictMatchState == NULL); 971 664 switch(mls) 972 665 { 973 666 default: /* includes case 3 */
+8 -8
lib/zstd/compress/zstd_fast.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 12 11 #ifndef ZSTD_FAST_H 13 12 #define ZSTD_FAST_H 14 13 15 - 16 14 #include "../common/mem.h" /* U32 */ 17 15 #include "zstd_compress_internal.h" 18 16 19 - void ZSTD_fillHashTable(ZSTD_matchState_t* ms, 20 - void const* end, ZSTD_dictTableLoadMethod_e dtlm); 17 + void ZSTD_fillHashTable(ZSTD_MatchState_t* ms, 18 + void const* end, ZSTD_dictTableLoadMethod_e dtlm, 19 + ZSTD_tableFillPurpose_e tfp); 21 20 size_t ZSTD_compressBlock_fast( 22 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 21 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 23 22 void const* src, size_t srcSize); 24 23 size_t ZSTD_compressBlock_fast_dictMatchState( 25 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 24 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 26 25 void const* src, size_t srcSize); 27 26 size_t ZSTD_compressBlock_fast_extDict( 28 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 27 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 29 28 void const* src, size_t srcSize); 30 - 31 29 32 30 #endif /* ZSTD_FAST_H */
+521 -423
lib/zstd/compress/zstd_lazy.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 11 10 12 11 #include "zstd_compress_internal.h" 13 12 #include "zstd_lazy.h" 13 + #include "../common/bits.h" /* ZSTD_countTrailingZeros64 */ 14 + 15 + #if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ 16 + || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ 17 + || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ 18 + || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) 19 + 20 + #define kLazySkippingStep 8 14 21 15 22 16 23 /*-************************************* 17 24 * Binary Tree search 18 25 ***************************************/ 19 26 20 - static void 21 - ZSTD_updateDUBT(ZSTD_matchState_t* ms, 27 + static 28 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 29 + void ZSTD_updateDUBT(ZSTD_MatchState_t* ms, 22 30 const BYTE* ip, const BYTE* iend, 23 31 U32 mls) 24 32 { ··· 70 60 * sort one already inserted but unsorted position 71 61 * assumption : curr >= btlow == (curr - btmask) 72 62 * doesn't fail */ 73 - static void 74 - ZSTD_insertDUBT1(const ZSTD_matchState_t* ms, 63 + static 64 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 65 + void ZSTD_insertDUBT1(const ZSTD_MatchState_t* ms, 75 66 U32 curr, const BYTE* inputEnd, 76 67 U32 nbCompares, U32 btLow, 77 68 const ZSTD_dictMode_e dictMode) ··· 160 149 } 161 150 162 151 163 - static size_t 164 - ZSTD_DUBT_findBetterDictMatch ( 165 - const ZSTD_matchState_t* ms, 152 + static 153 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 154 + size_t ZSTD_DUBT_findBetterDictMatch ( 155 + const ZSTD_MatchState_t* ms, 166 156 const BYTE* const ip, const BYTE* const iend, 167 157 size_t* offsetPtr, 168 158 size_t bestLength, ··· 171 159 U32 const mls, 172 160 const ZSTD_dictMode_e dictMode) 173 161 { 174 - const ZSTD_matchState_t * const dms = ms->dictMatchState; 162 + const ZSTD_MatchState_t * const dms = ms->dictMatchState; 175 163 const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; 176 164 const U32 * const dictHashTable = dms->hashTable; 177 165 U32 const hashLog = dmsCParams->hashLog; ··· 209 197 U32 matchIndex = dictMatchIndex + dictIndexDelta; 210 198 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { 211 199 DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", 212 - curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, STORE_OFFSET(curr - matchIndex), dictMatchIndex, matchIndex); 213 - bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex); 200 + curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, OFFSET_TO_OFFBASE(curr - matchIndex), dictMatchIndex, matchIndex); 201 + bestLength = matchLength, *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); 214 202 } 215 203 if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ 216 204 break; /* drop, to guarantee consistency (miss a little bit of compression) */ ··· 230 218 } 231 219 232 220 if (bestLength >= MINMATCH) { 233 - U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex; 221 + U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offsetPtr); (void)mIndex; 234 222 DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", 235 223 curr, (U32)bestLength, (U32)*offsetPtr, mIndex); 236 224 } ··· 239 227 } 240 228 241 229 242 - static size_t 243 - ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, 230 + static 231 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 232 + size_t ZSTD_DUBT_findBestMatch(ZSTD_MatchState_t* ms, 244 233 const BYTE* const ip, const BYTE* const iend, 245 - size_t* offsetPtr, 234 + size_t* offBasePtr, 246 235 U32 const mls, 247 236 const ZSTD_dictMode_e dictMode) 248 237 { ··· 340 327 if (matchLength > bestLength) { 341 328 if (matchLength > matchEndIdx - matchIndex) 342 329 matchEndIdx = matchIndex + (U32)matchLength; 343 - if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) 344 - bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex); 330 + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)*offBasePtr)) ) 331 + bestLength = matchLength, *offBasePtr = OFFSET_TO_OFFBASE(curr - matchIndex); 345 332 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ 346 333 if (dictMode == ZSTD_dictMatchState) { 347 334 nbCompares = 0; /* in addition to avoiding checking any ··· 374 361 if (dictMode == ZSTD_dictMatchState && nbCompares) { 375 362 bestLength = ZSTD_DUBT_findBetterDictMatch( 376 363 ms, ip, iend, 377 - offsetPtr, bestLength, nbCompares, 364 + offBasePtr, bestLength, nbCompares, 378 365 mls, dictMode); 379 366 } 380 367 381 368 assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */ 382 369 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ 383 370 if (bestLength >= MINMATCH) { 384 - U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex; 371 + U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offBasePtr); (void)mIndex; 385 372 DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", 386 - curr, (U32)bestLength, (U32)*offsetPtr, mIndex); 373 + curr, (U32)bestLength, (U32)*offBasePtr, mIndex); 387 374 } 388 375 return bestLength; 389 376 } ··· 391 378 392 379 393 380 /* ZSTD_BtFindBestMatch() : Tree updater, providing best match */ 394 - FORCE_INLINE_TEMPLATE size_t 395 - ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, 381 + FORCE_INLINE_TEMPLATE 382 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 383 + size_t ZSTD_BtFindBestMatch( ZSTD_MatchState_t* ms, 396 384 const BYTE* const ip, const BYTE* const iLimit, 397 - size_t* offsetPtr, 385 + size_t* offBasePtr, 398 386 const U32 mls /* template */, 399 387 const ZSTD_dictMode_e dictMode) 400 388 { 401 389 DEBUGLOG(7, "ZSTD_BtFindBestMatch"); 402 390 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ 403 391 ZSTD_updateDUBT(ms, ip, iLimit, mls); 404 - return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); 392 + return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offBasePtr, mls, dictMode); 405 393 } 406 394 407 395 /* ********************************* 408 396 * Dedicated dict search 409 397 ***********************************/ 410 398 411 - void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) 399 + void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_MatchState_t* ms, const BYTE* const ip) 412 400 { 413 401 const BYTE* const base = ms->window.base; 414 402 U32 const target = (U32)(ip - base); ··· 528 514 */ 529 515 FORCE_INLINE_TEMPLATE 530 516 size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts, 531 - const ZSTD_matchState_t* const dms, 517 + const ZSTD_MatchState_t* const dms, 532 518 const BYTE* const ip, const BYTE* const iLimit, 533 519 const BYTE* const prefixStart, const U32 curr, 534 520 const U32 dictLimit, const size_t ddsIdx) { ··· 575 561 /* save best solution */ 576 562 if (currentMl > ml) { 577 563 ml = currentMl; 578 - *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta)); 564 + *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta)); 579 565 if (ip+currentMl == iLimit) { 580 566 /* best possible, avoids read overflow on next attempt */ 581 567 return ml; ··· 612 598 /* save best solution */ 613 599 if (currentMl > ml) { 614 600 ml = currentMl; 615 - *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta)); 601 + *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta)); 616 602 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ 617 603 } 618 604 } ··· 628 614 629 615 /* Update chains up to ip (excluded) 630 616 Assumption : always within prefix (i.e. not within extDict) */ 631 - FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( 632 - ZSTD_matchState_t* ms, 617 + FORCE_INLINE_TEMPLATE 618 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 619 + U32 ZSTD_insertAndFindFirstIndex_internal( 620 + ZSTD_MatchState_t* ms, 633 621 const ZSTD_compressionParameters* const cParams, 634 - const BYTE* ip, U32 const mls) 622 + const BYTE* ip, U32 const mls, U32 const lazySkipping) 635 623 { 636 624 U32* const hashTable = ms->hashTable; 637 625 const U32 hashLog = cParams->hashLog; ··· 648 632 NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; 649 633 hashTable[h] = idx; 650 634 idx++; 635 + /* Stop inserting every position when in the lazy skipping mode. */ 636 + if (lazySkipping) 637 + break; 651 638 } 652 639 653 640 ms->nextToUpdate = target; 654 641 return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; 655 642 } 656 643 657 - U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { 644 + U32 ZSTD_insertAndFindFirstIndex(ZSTD_MatchState_t* ms, const BYTE* ip) { 658 645 const ZSTD_compressionParameters* const cParams = &ms->cParams; 659 - return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); 646 + return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0); 660 647 } 661 648 662 649 /* inlining is important to hardwire a hot branch (template emulation) */ 663 650 FORCE_INLINE_TEMPLATE 651 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 664 652 size_t ZSTD_HcFindBestMatch( 665 - ZSTD_matchState_t* ms, 653 + ZSTD_MatchState_t* ms, 666 654 const BYTE* const ip, const BYTE* const iLimit, 667 655 size_t* offsetPtr, 668 656 const U32 mls, const ZSTD_dictMode_e dictMode) ··· 690 670 U32 nbAttempts = 1U << cParams->searchLog; 691 671 size_t ml=4-1; 692 672 693 - const ZSTD_matchState_t* const dms = ms->dictMatchState; 673 + const ZSTD_MatchState_t* const dms = ms->dictMatchState; 694 674 const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch 695 675 ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0; 696 676 const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch ··· 704 684 } 705 685 706 686 /* HC4 match finder */ 707 - matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); 687 + matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping); 708 688 709 689 for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) { 710 690 size_t currentMl=0; 711 691 if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { 712 692 const BYTE* const match = base + matchIndex; 713 693 assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ 714 - if (match[ml] == ip[ml]) /* potentially better */ 694 + /* read 4B starting from (match + ml + 1 - sizeof(U32)) */ 695 + if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */ 715 696 currentMl = ZSTD_count(ip, match, iLimit); 716 697 } else { 717 698 const BYTE* const match = dictBase + matchIndex; ··· 724 703 /* save best solution */ 725 704 if (currentMl > ml) { 726 705 ml = currentMl; 727 - *offsetPtr = STORE_OFFSET(curr - matchIndex); 706 + *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); 728 707 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ 729 708 } 730 709 ··· 760 739 if (currentMl > ml) { 761 740 ml = currentMl; 762 741 assert(curr > matchIndex + dmsIndexDelta); 763 - *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta)); 742 + *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta)); 764 743 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ 765 744 } 766 745 ··· 777 756 * (SIMD) Row-based matchfinder 778 757 ***********************************/ 779 758 /* Constants for row-based hash */ 780 - #define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */ 781 - #define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ 782 759 #define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1) 783 760 #define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */ 784 761 ··· 788 769 * Starting from the LSB, returns the idx of the next non-zero bit. 789 770 * Basically counting the nb of trailing zeroes. 790 771 */ 791 - static U32 ZSTD_VecMask_next(ZSTD_VecMask val) { 792 - assert(val != 0); 793 - # if (defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)))) 794 - if (sizeof(size_t) == 4) { 795 - U32 mostSignificantWord = (U32)(val >> 32); 796 - U32 leastSignificantWord = (U32)val; 797 - if (leastSignificantWord == 0) { 798 - return 32 + (U32)__builtin_ctz(mostSignificantWord); 799 - } else { 800 - return (U32)__builtin_ctz(leastSignificantWord); 801 - } 802 - } else { 803 - return (U32)__builtin_ctzll(val); 804 - } 805 - # else 806 - /* Software ctz version: http://aggregate.org/MAGIC/#Trailing%20Zero%20Count 807 - * and: https://stackoverflow.com/questions/2709430/count-number-of-bits-in-a-64-bit-long-big-integer 808 - */ 809 - val = ~val & (val - 1ULL); /* Lowest set bit mask */ 810 - val = val - ((val >> 1) & 0x5555555555555555); 811 - val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL); 812 - return (U32)((((val + (val >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56); 813 - # endif 814 - } 815 - 816 - /* ZSTD_rotateRight_*(): 817 - * Rotates a bitfield to the right by "count" bits. 818 - * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts 819 - */ 820 - FORCE_INLINE_TEMPLATE 821 - U64 ZSTD_rotateRight_U64(U64 const value, U32 count) { 822 - assert(count < 64); 823 - count &= 0x3F; /* for fickle pattern recognition */ 824 - return (value >> count) | (U64)(value << ((0U - count) & 0x3F)); 825 - } 826 - 827 - FORCE_INLINE_TEMPLATE 828 - U32 ZSTD_rotateRight_U32(U32 const value, U32 count) { 829 - assert(count < 32); 830 - count &= 0x1F; /* for fickle pattern recognition */ 831 - return (value >> count) | (U32)(value << ((0U - count) & 0x1F)); 832 - } 833 - 834 - FORCE_INLINE_TEMPLATE 835 - U16 ZSTD_rotateRight_U16(U16 const value, U32 count) { 836 - assert(count < 16); 837 - count &= 0x0F; /* for fickle pattern recognition */ 838 - return (value >> count) | (U16)(value << ((0U - count) & 0x0F)); 772 + MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) { 773 + return ZSTD_countTrailingZeros64(val); 839 774 } 840 775 841 776 /* ZSTD_row_nextIndex(): 842 777 * Returns the next index to insert at within a tagTable row, and updates the "head" 843 - * value to reflect the update. Essentially cycles backwards from [0, {entries per row}) 778 + * value to reflect the update. Essentially cycles backwards from [1, {entries per row}) 844 779 */ 845 780 FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) { 846 - U32 const next = (*tagRow - 1) & rowMask; 847 - *tagRow = (BYTE)next; 848 - return next; 781 + U32 next = (*tagRow-1) & rowMask; 782 + next += (next == 0) ? rowMask : 0; /* skip first position */ 783 + *tagRow = (BYTE)next; 784 + return next; 849 785 } 850 786 851 787 /* ZSTD_isAligned(): ··· 814 840 /* ZSTD_row_prefetch(): 815 841 * Performs prefetching for the hashTable and tagTable at a given row. 816 842 */ 817 - FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) { 843 + FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) { 818 844 PREFETCH_L1(hashTable + relRow); 819 845 if (rowLog >= 5) { 820 846 PREFETCH_L1(hashTable + relRow + 16); ··· 833 859 * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries, 834 860 * but not beyond iLimit. 835 861 */ 836 - FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base, 862 + FORCE_INLINE_TEMPLATE 863 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 864 + void ZSTD_row_fillHashCache(ZSTD_MatchState_t* ms, const BYTE* base, 837 865 U32 const rowLog, U32 const mls, 838 866 U32 idx, const BYTE* const iLimit) 839 867 { 840 868 U32 const* const hashTable = ms->hashTable; 841 - U16 const* const tagTable = ms->tagTable; 869 + BYTE const* const tagTable = ms->tagTable; 842 870 U32 const hashLog = ms->rowHashLog; 843 871 U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1); 844 872 U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch); 845 873 846 874 for (; idx < lim; ++idx) { 847 - U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); 875 + U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt); 848 876 U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; 849 877 ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); 850 878 ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash; ··· 861 885 * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at 862 886 * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable. 863 887 */ 864 - FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable, 865 - U16 const* tagTable, BYTE const* base, 888 + FORCE_INLINE_TEMPLATE 889 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 890 + U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable, 891 + BYTE const* tagTable, BYTE const* base, 866 892 U32 idx, U32 const hashLog, 867 - U32 const rowLog, U32 const mls) 893 + U32 const rowLog, U32 const mls, 894 + U64 const hashSalt) 868 895 { 869 - U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); 896 + U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt); 870 897 U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; 871 898 ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); 872 899 { U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK]; ··· 881 902 /* ZSTD_row_update_internalImpl(): 882 903 * Updates the hash table with positions starting from updateStartIdx until updateEndIdx. 883 904 */ 884 - FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms, 885 - U32 updateStartIdx, U32 const updateEndIdx, 886 - U32 const mls, U32 const rowLog, 887 - U32 const rowMask, U32 const useCache) 905 + FORCE_INLINE_TEMPLATE 906 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 907 + void ZSTD_row_update_internalImpl(ZSTD_MatchState_t* ms, 908 + U32 updateStartIdx, U32 const updateEndIdx, 909 + U32 const mls, U32 const rowLog, 910 + U32 const rowMask, U32 const useCache) 888 911 { 889 912 U32* const hashTable = ms->hashTable; 890 - U16* const tagTable = ms->tagTable; 913 + BYTE* const tagTable = ms->tagTable; 891 914 U32 const hashLog = ms->rowHashLog; 892 915 const BYTE* const base = ms->window.base; 893 916 894 917 DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx); 895 918 for (; updateStartIdx < updateEndIdx; ++updateStartIdx) { 896 - U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls) 897 - : (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); 919 + U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt) 920 + : (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt); 898 921 U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; 899 922 U32* const row = hashTable + relRow; 900 - BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte. 901 - Explicit cast allows us to get exact desired position within each row */ 923 + BYTE* tagRow = tagTable + relRow; 902 924 U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); 903 925 904 - assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls)); 905 - ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK; 926 + assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt)); 927 + tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK; 906 928 row[pos] = updateStartIdx; 907 929 } 908 930 } ··· 912 932 * Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate. 913 933 * Skips sections of long matches as is necessary. 914 934 */ 915 - FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip, 916 - U32 const mls, U32 const rowLog, 917 - U32 const rowMask, U32 const useCache) 935 + FORCE_INLINE_TEMPLATE 936 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 937 + void ZSTD_row_update_internal(ZSTD_MatchState_t* ms, const BYTE* ip, 938 + U32 const mls, U32 const rowLog, 939 + U32 const rowMask, U32 const useCache) 918 940 { 919 941 U32 idx = ms->nextToUpdate; 920 942 const BYTE* const base = ms->window.base; ··· 947 965 * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary 948 966 * processing. 949 967 */ 950 - void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) { 968 + void ZSTD_row_update(ZSTD_MatchState_t* const ms, const BYTE* ip) { 951 969 const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); 952 970 const U32 rowMask = (1u << rowLog) - 1; 953 971 const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */); 954 972 955 973 DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog); 956 - ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */); 974 + ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* don't use cache */); 975 + } 976 + 977 + /* Returns the mask width of bits group of which will be set to 1. Given not all 978 + * architectures have easy movemask instruction, this helps to iterate over 979 + * groups of bits easier and faster. 980 + */ 981 + FORCE_INLINE_TEMPLATE U32 982 + ZSTD_row_matchMaskGroupWidth(const U32 rowEntries) 983 + { 984 + assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64); 985 + assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES); 986 + (void)rowEntries; 987 + #if defined(ZSTD_ARCH_ARM_NEON) 988 + /* NEON path only works for little endian */ 989 + if (!MEM_isLittleEndian()) { 990 + return 1; 991 + } 992 + if (rowEntries == 16) { 993 + return 4; 994 + } 995 + if (rowEntries == 32) { 996 + return 2; 997 + } 998 + if (rowEntries == 64) { 999 + return 1; 1000 + } 1001 + #endif 1002 + return 1; 957 1003 } 958 1004 959 1005 #if defined(ZSTD_ARCH_X86_SSE2) ··· 1004 994 } 1005 995 #endif 1006 996 1007 - /* Returns a ZSTD_VecMask (U32) that has the nth bit set to 1 if the newly-computed "tag" matches 1008 - * the hash at the nth position in a row of the tagTable. 1009 - * Each row is a circular buffer beginning at the value of "head". So we must rotate the "matches" bitfield 1010 - * to match up with the actual layout of the entries within the hashTable */ 997 + #if defined(ZSTD_ARCH_ARM_NEON) 1011 998 FORCE_INLINE_TEMPLATE ZSTD_VecMask 1012 - ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, const U32 rowEntries) 999 + ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag, const U32 headGrouped) 1013 1000 { 1014 - const BYTE* const src = tagRow + ZSTD_ROW_HASH_TAG_OFFSET; 1001 + assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64); 1002 + if (rowEntries == 16) { 1003 + /* vshrn_n_u16 shifts by 4 every u16 and narrows to 8 lower bits. 1004 + * After that groups of 4 bits represent the equalMask. We lower 1005 + * all bits except the highest in these groups by doing AND with 1006 + * 0x88 = 0b10001000. 1007 + */ 1008 + const uint8x16_t chunk = vld1q_u8(src); 1009 + const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag))); 1010 + const uint8x8_t res = vshrn_n_u16(equalMask, 4); 1011 + const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0); 1012 + return ZSTD_rotateRight_U64(matches, headGrouped) & 0x8888888888888888ull; 1013 + } else if (rowEntries == 32) { 1014 + /* Same idea as with rowEntries == 16 but doing AND with 1015 + * 0x55 = 0b01010101. 1016 + */ 1017 + const uint16x8x2_t chunk = vld2q_u16((const uint16_t*)(const void*)src); 1018 + const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]); 1019 + const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]); 1020 + const uint8x16_t dup = vdupq_n_u8(tag); 1021 + const uint8x8_t t0 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk0, dup)), 6); 1022 + const uint8x8_t t1 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk1, dup)), 6); 1023 + const uint8x8_t res = vsli_n_u8(t0, t1, 4); 1024 + const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0) ; 1025 + return ZSTD_rotateRight_U64(matches, headGrouped) & 0x5555555555555555ull; 1026 + } else { /* rowEntries == 64 */ 1027 + const uint8x16x4_t chunk = vld4q_u8(src); 1028 + const uint8x16_t dup = vdupq_n_u8(tag); 1029 + const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup); 1030 + const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup); 1031 + const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup); 1032 + const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup); 1033 + 1034 + const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1); 1035 + const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1); 1036 + const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2); 1037 + const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4); 1038 + const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4); 1039 + const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0); 1040 + return ZSTD_rotateRight_U64(matches, headGrouped); 1041 + } 1042 + } 1043 + #endif 1044 + 1045 + /* Returns a ZSTD_VecMask (U64) that has the nth group (determined by 1046 + * ZSTD_row_matchMaskGroupWidth) of bits set to 1 if the newly-computed "tag" 1047 + * matches the hash at the nth position in a row of the tagTable. 1048 + * Each row is a circular buffer beginning at the value of "headGrouped". So we 1049 + * must rotate the "matches" bitfield to match up with the actual layout of the 1050 + * entries within the hashTable */ 1051 + FORCE_INLINE_TEMPLATE ZSTD_VecMask 1052 + ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries) 1053 + { 1054 + const BYTE* const src = tagRow; 1015 1055 assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64); 1016 1056 assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES); 1057 + assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8); 1017 1058 1018 1059 #if defined(ZSTD_ARCH_X86_SSE2) 1019 1060 1020 - return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, head); 1061 + return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, headGrouped); 1021 1062 1022 1063 #else /* SW or NEON-LE */ 1023 1064 1024 1065 # if defined(ZSTD_ARCH_ARM_NEON) 1025 1066 /* This NEON path only works for little endian - otherwise use SWAR below */ 1026 1067 if (MEM_isLittleEndian()) { 1027 - if (rowEntries == 16) { 1028 - const uint8x16_t chunk = vld1q_u8(src); 1029 - const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag))); 1030 - const uint16x8_t t0 = vshlq_n_u16(equalMask, 7); 1031 - const uint32x4_t t1 = vreinterpretq_u32_u16(vsriq_n_u16(t0, t0, 14)); 1032 - const uint64x2_t t2 = vreinterpretq_u64_u32(vshrq_n_u32(t1, 14)); 1033 - const uint8x16_t t3 = vreinterpretq_u8_u64(vsraq_n_u64(t2, t2, 28)); 1034 - const U16 hi = (U16)vgetq_lane_u8(t3, 8); 1035 - const U16 lo = (U16)vgetq_lane_u8(t3, 0); 1036 - return ZSTD_rotateRight_U16((hi << 8) | lo, head); 1037 - } else if (rowEntries == 32) { 1038 - const uint16x8x2_t chunk = vld2q_u16((const U16*)(const void*)src); 1039 - const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]); 1040 - const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]); 1041 - const uint8x16_t equalMask0 = vceqq_u8(chunk0, vdupq_n_u8(tag)); 1042 - const uint8x16_t equalMask1 = vceqq_u8(chunk1, vdupq_n_u8(tag)); 1043 - const int8x8_t pack0 = vqmovn_s16(vreinterpretq_s16_u8(equalMask0)); 1044 - const int8x8_t pack1 = vqmovn_s16(vreinterpretq_s16_u8(equalMask1)); 1045 - const uint8x8_t t0 = vreinterpret_u8_s8(pack0); 1046 - const uint8x8_t t1 = vreinterpret_u8_s8(pack1); 1047 - const uint8x8_t t2 = vsri_n_u8(t1, t0, 2); 1048 - const uint8x8x2_t t3 = vuzp_u8(t2, t0); 1049 - const uint8x8_t t4 = vsri_n_u8(t3.val[1], t3.val[0], 4); 1050 - const U32 matches = vget_lane_u32(vreinterpret_u32_u8(t4), 0); 1051 - return ZSTD_rotateRight_U32(matches, head); 1052 - } else { /* rowEntries == 64 */ 1053 - const uint8x16x4_t chunk = vld4q_u8(src); 1054 - const uint8x16_t dup = vdupq_n_u8(tag); 1055 - const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup); 1056 - const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup); 1057 - const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup); 1058 - const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup); 1059 - 1060 - const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1); 1061 - const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1); 1062 - const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2); 1063 - const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4); 1064 - const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4); 1065 - const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0); 1066 - return ZSTD_rotateRight_U64(matches, head); 1067 - } 1068 + return ZSTD_row_getNEONMask(rowEntries, src, tag, headGrouped); 1068 1069 } 1069 1070 # endif /* ZSTD_ARCH_ARM_NEON */ 1070 1071 /* SWAR */ 1071 - { const size_t chunkSize = sizeof(size_t); 1072 + { const int chunkSize = sizeof(size_t); 1072 1073 const size_t shiftAmount = ((chunkSize * 8) - chunkSize); 1073 1074 const size_t xFF = ~((size_t)0); 1074 1075 const size_t x01 = xFF / 0xFF; ··· 1112 1091 } 1113 1092 matches = ~matches; 1114 1093 if (rowEntries == 16) { 1115 - return ZSTD_rotateRight_U16((U16)matches, head); 1094 + return ZSTD_rotateRight_U16((U16)matches, headGrouped); 1116 1095 } else if (rowEntries == 32) { 1117 - return ZSTD_rotateRight_U32((U32)matches, head); 1096 + return ZSTD_rotateRight_U32((U32)matches, headGrouped); 1118 1097 } else { 1119 - return ZSTD_rotateRight_U64((U64)matches, head); 1098 + return ZSTD_rotateRight_U64((U64)matches, headGrouped); 1120 1099 } 1121 1100 } 1122 1101 #endif ··· 1124 1103 1125 1104 /* The high-level approach of the SIMD row based match finder is as follows: 1126 1105 * - Figure out where to insert the new entry: 1127 - * - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag" 1128 - * - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines 1106 + * - Generate a hash for current input position and split it into a one byte of tag and `rowHashLog` bits of index. 1107 + * - The hash is salted by a value that changes on every context reset, so when the same table is used 1108 + * we will avoid collisions that would otherwise slow us down by introducing phantom matches. 1109 + * - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines 1129 1110 * which row to insert into. 1130 - * - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can 1131 - * be considered as a circular buffer with a "head" index that resides in the tagTable. 1132 - * - Also insert the "tag" into the equivalent row and position in the tagTable. 1133 - * - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry. 1134 - * The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively, 1135 - * for alignment/performance reasons, leaving some bytes unused. 1136 - * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and 1111 + * - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can 1112 + * be considered as a circular buffer with a "head" index that resides in the tagTable (overall 16 or 32 bytes 1113 + * per row). 1114 + * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the position and 1137 1115 * generate a bitfield that we can cycle through to check the collisions in the hash table. 1138 1116 * - Pick the longest match. 1117 + * - Insert the tag into the equivalent row and position in the tagTable. 1139 1118 */ 1140 1119 FORCE_INLINE_TEMPLATE 1120 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1141 1121 size_t ZSTD_RowFindBestMatch( 1142 - ZSTD_matchState_t* ms, 1122 + ZSTD_MatchState_t* ms, 1143 1123 const BYTE* const ip, const BYTE* const iLimit, 1144 1124 size_t* offsetPtr, 1145 1125 const U32 mls, const ZSTD_dictMode_e dictMode, 1146 1126 const U32 rowLog) 1147 1127 { 1148 1128 U32* const hashTable = ms->hashTable; 1149 - U16* const tagTable = ms->tagTable; 1129 + BYTE* const tagTable = ms->tagTable; 1150 1130 U32* const hashCache = ms->hashCache; 1151 1131 const U32 hashLog = ms->rowHashLog; 1152 1132 const ZSTD_compressionParameters* const cParams = &ms->cParams; ··· 1165 1143 const U32 rowEntries = (1U << rowLog); 1166 1144 const U32 rowMask = rowEntries - 1; 1167 1145 const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */ 1146 + const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries); 1147 + const U64 hashSalt = ms->hashSalt; 1168 1148 U32 nbAttempts = 1U << cappedSearchLog; 1169 1149 size_t ml=4-1; 1150 + U32 hash; 1170 1151 1171 1152 /* DMS/DDS variables that may be referenced laster */ 1172 - const ZSTD_matchState_t* const dms = ms->dictMatchState; 1153 + const ZSTD_MatchState_t* const dms = ms->dictMatchState; 1173 1154 1174 1155 /* Initialize the following variables to satisfy static analyzer */ 1175 1156 size_t ddsIdx = 0; ··· 1193 1168 if (dictMode == ZSTD_dictMatchState) { 1194 1169 /* Prefetch DMS rows */ 1195 1170 U32* const dmsHashTable = dms->hashTable; 1196 - U16* const dmsTagTable = dms->tagTable; 1171 + BYTE* const dmsTagTable = dms->tagTable; 1197 1172 U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls); 1198 1173 U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; 1199 1174 dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK; ··· 1203 1178 } 1204 1179 1205 1180 /* Update the hashTable and tagTable up to (but not including) ip */ 1206 - ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */); 1181 + if (!ms->lazySkipping) { 1182 + ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */); 1183 + hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt); 1184 + } else { 1185 + /* Stop inserting every position when in the lazy skipping mode. 1186 + * The hash cache is also not kept up to date in this mode. 1187 + */ 1188 + hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt); 1189 + ms->nextToUpdate = curr; 1190 + } 1191 + ms->hashSaltEntropy += hash; /* collect salt entropy */ 1192 + 1207 1193 { /* Get the hash for ip, compute the appropriate row */ 1208 - U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls); 1209 1194 U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; 1210 1195 U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK; 1211 1196 U32* const row = hashTable + relRow; 1212 1197 BYTE* tagRow = (BYTE*)(tagTable + relRow); 1213 - U32 const head = *tagRow & rowMask; 1198 + U32 const headGrouped = (*tagRow & rowMask) * groupWidth; 1214 1199 U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES]; 1215 1200 size_t numMatches = 0; 1216 1201 size_t currMatch = 0; 1217 - ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries); 1202 + ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries); 1218 1203 1219 1204 /* Cycle through the matches and prefetch */ 1220 - for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { 1221 - U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; 1205 + for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) { 1206 + U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask; 1222 1207 U32 const matchIndex = row[matchPos]; 1208 + if(matchPos == 0) continue; 1223 1209 assert(numMatches < rowEntries); 1224 1210 if (matchIndex < lowLimit) 1225 1211 break; ··· 1240 1204 PREFETCH_L1(dictBase + matchIndex); 1241 1205 } 1242 1206 matchBuffer[numMatches++] = matchIndex; 1207 + --nbAttempts; 1243 1208 } 1244 1209 1245 1210 /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop 1246 1211 in ZSTD_row_update_internal() at the next search. */ 1247 1212 { 1248 1213 U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); 1249 - tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag; 1214 + tagRow[pos] = (BYTE)tag; 1250 1215 row[pos] = ms->nextToUpdate++; 1251 1216 } 1252 1217 ··· 1261 1224 if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { 1262 1225 const BYTE* const match = base + matchIndex; 1263 1226 assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ 1264 - if (match[ml] == ip[ml]) /* potentially better */ 1227 + /* read 4B starting from (match + ml + 1 - sizeof(U32)) */ 1228 + if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */ 1265 1229 currentMl = ZSTD_count(ip, match, iLimit); 1266 1230 } else { 1267 1231 const BYTE* const match = dictBase + matchIndex; ··· 1274 1236 /* Save best solution */ 1275 1237 if (currentMl > ml) { 1276 1238 ml = currentMl; 1277 - *offsetPtr = STORE_OFFSET(curr - matchIndex); 1239 + *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); 1278 1240 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ 1279 1241 } 1280 1242 } ··· 1292 1254 const U32 dmsSize = (U32)(dmsEnd - dmsBase); 1293 1255 const U32 dmsIndexDelta = dictLimit - dmsSize; 1294 1256 1295 - { U32 const head = *dmsTagRow & rowMask; 1257 + { U32 const headGrouped = (*dmsTagRow & rowMask) * groupWidth; 1296 1258 U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES]; 1297 1259 size_t numMatches = 0; 1298 1260 size_t currMatch = 0; 1299 - ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries); 1261 + ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries); 1300 1262 1301 - for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { 1302 - U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; 1263 + for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) { 1264 + U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask; 1303 1265 U32 const matchIndex = dmsRow[matchPos]; 1266 + if(matchPos == 0) continue; 1304 1267 if (matchIndex < dmsLowestIndex) 1305 1268 break; 1306 1269 PREFETCH_L1(dmsBase + matchIndex); 1307 1270 matchBuffer[numMatches++] = matchIndex; 1271 + --nbAttempts; 1308 1272 } 1309 1273 1310 1274 /* Return the longest match */ ··· 1325 1285 if (currentMl > ml) { 1326 1286 ml = currentMl; 1327 1287 assert(curr > matchIndex + dmsIndexDelta); 1328 - *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta)); 1288 + *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta)); 1329 1289 if (ip+currentMl == iLimit) break; 1330 1290 } 1331 1291 } ··· 1341 1301 * ZSTD_searchMax() dispatches to the correct implementation function. 1342 1302 * 1343 1303 * TODO: The start of the search function involves loading and calculating a 1344 - * bunch of constants from the ZSTD_matchState_t. These computations could be 1304 + * bunch of constants from the ZSTD_MatchState_t. These computations could be 1345 1305 * done in an initialization function, and saved somewhere in the match state. 1346 1306 * Then we could pass a pointer to the saved state instead of the match state, 1347 1307 * and avoid duplicate computations. ··· 1365 1325 1366 1326 #define GEN_ZSTD_BT_SEARCH_FN(dictMode, mls) \ 1367 1327 ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)( \ 1368 - ZSTD_matchState_t* ms, \ 1328 + ZSTD_MatchState_t* ms, \ 1369 1329 const BYTE* ip, const BYTE* const iLimit, \ 1370 1330 size_t* offBasePtr) \ 1371 1331 { \ ··· 1375 1335 1376 1336 #define GEN_ZSTD_HC_SEARCH_FN(dictMode, mls) \ 1377 1337 ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN(dictMode, mls)( \ 1378 - ZSTD_matchState_t* ms, \ 1338 + ZSTD_MatchState_t* ms, \ 1379 1339 const BYTE* ip, const BYTE* const iLimit, \ 1380 1340 size_t* offsetPtr) \ 1381 1341 { \ ··· 1385 1345 1386 1346 #define GEN_ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) \ 1387 1347 ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)( \ 1388 - ZSTD_matchState_t* ms, \ 1348 + ZSTD_MatchState_t* ms, \ 1389 1349 const BYTE* ip, const BYTE* const iLimit, \ 1390 1350 size_t* offsetPtr) \ 1391 1351 { \ ··· 1486 1446 * If a match is found its offset is stored in @p offsetPtr. 1487 1447 */ 1488 1448 FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax( 1489 - ZSTD_matchState_t* ms, 1449 + ZSTD_MatchState_t* ms, 1490 1450 const BYTE* ip, 1491 1451 const BYTE* iend, 1492 1452 size_t* offsetPtr, ··· 1512 1472 * Common parser - lazy strategy 1513 1473 *********************************/ 1514 1474 1515 - FORCE_INLINE_TEMPLATE size_t 1516 - ZSTD_compressBlock_lazy_generic( 1517 - ZSTD_matchState_t* ms, seqStore_t* seqStore, 1475 + FORCE_INLINE_TEMPLATE 1476 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1477 + size_t ZSTD_compressBlock_lazy_generic( 1478 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, 1518 1479 U32 rep[ZSTD_REP_NUM], 1519 1480 const void* src, size_t srcSize, 1520 1481 const searchMethod_e searchMethod, const U32 depth, ··· 1532 1491 const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6); 1533 1492 const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); 1534 1493 1535 - U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; 1494 + U32 offset_1 = rep[0], offset_2 = rep[1]; 1495 + U32 offsetSaved1 = 0, offsetSaved2 = 0; 1536 1496 1537 1497 const int isDMS = dictMode == ZSTD_dictMatchState; 1538 1498 const int isDDS = dictMode == ZSTD_dedicatedDictSearch; 1539 1499 const int isDxS = isDMS || isDDS; 1540 - const ZSTD_matchState_t* const dms = ms->dictMatchState; 1500 + const ZSTD_MatchState_t* const dms = ms->dictMatchState; 1541 1501 const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0; 1542 1502 const BYTE* const dictBase = isDxS ? dms->window.base : NULL; 1543 1503 const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL; ··· 1554 1512 U32 const curr = (U32)(ip - base); 1555 1513 U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog); 1556 1514 U32 const maxRep = curr - windowLow; 1557 - if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; 1558 - if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; 1515 + if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; 1516 + if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; 1559 1517 } 1560 1518 if (isDxS) { 1561 1519 /* dictMatchState repCode checks don't currently handle repCode == 0 ··· 1564 1522 assert(offset_2 <= dictAndPrefixLength); 1565 1523 } 1566 1524 1525 + /* Reset the lazy skipping state */ 1526 + ms->lazySkipping = 0; 1527 + 1567 1528 if (searchMethod == search_rowHash) { 1568 - ZSTD_row_fillHashCache(ms, base, rowLog, 1569 - MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), 1570 - ms->nextToUpdate, ilimit); 1529 + ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); 1571 1530 } 1572 1531 1573 1532 /* Match Loop */ ··· 1580 1537 #endif 1581 1538 while (ip < ilimit) { 1582 1539 size_t matchLength=0; 1583 - size_t offcode=STORE_REPCODE_1; 1540 + size_t offBase = REPCODE1_TO_OFFBASE; 1584 1541 const BYTE* start=ip+1; 1585 1542 DEBUGLOG(7, "search baseline (depth 0)"); 1586 1543 ··· 1591 1548 && repIndex < prefixLowestIndex) ? 1592 1549 dictBase + (repIndex - dictIndexDelta) : 1593 1550 base + repIndex; 1594 - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) 1551 + if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex)) 1595 1552 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { 1596 1553 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; 1597 1554 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; ··· 1605 1562 } 1606 1563 1607 1564 /* first search (depth 0) */ 1608 - { size_t offsetFound = 999999999; 1609 - size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offsetFound, mls, rowLog, searchMethod, dictMode); 1565 + { size_t offbaseFound = 999999999; 1566 + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offbaseFound, mls, rowLog, searchMethod, dictMode); 1610 1567 if (ml2 > matchLength) 1611 - matchLength = ml2, start = ip, offcode=offsetFound; 1568 + matchLength = ml2, start = ip, offBase = offbaseFound; 1612 1569 } 1613 1570 1614 1571 if (matchLength < 4) { 1615 - ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ 1572 + size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */; 1573 + ip += step; 1574 + /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time. 1575 + * In this mode we stop inserting every position into our tables, and only insert 1576 + * positions that we search, which is one in step positions. 1577 + * The exact cutoff is flexible, I've just chosen a number that is reasonably high, 1578 + * so we minimize the compression ratio loss in "normal" scenarios. This mode gets 1579 + * triggered once we've gone 2KB without finding any matches. 1580 + */ 1581 + ms->lazySkipping = step > kLazySkippingStep; 1616 1582 continue; 1617 1583 } 1618 1584 ··· 1631 1579 DEBUGLOG(7, "search depth 1"); 1632 1580 ip ++; 1633 1581 if ( (dictMode == ZSTD_noDict) 1634 - && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { 1582 + && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { 1635 1583 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; 1636 1584 int const gain2 = (int)(mlRep * 3); 1637 - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); 1585 + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1); 1638 1586 if ((mlRep >= 4) && (gain2 > gain1)) 1639 - matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; 1587 + matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip; 1640 1588 } 1641 1589 if (isDxS) { 1642 1590 const U32 repIndex = (U32)(ip - base) - offset_1; 1643 1591 const BYTE* repMatch = repIndex < prefixLowestIndex ? 1644 1592 dictBase + (repIndex - dictIndexDelta) : 1645 1593 base + repIndex; 1646 - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) 1594 + if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex)) 1647 1595 && (MEM_read32(repMatch) == MEM_read32(ip)) ) { 1648 1596 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; 1649 1597 size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; 1650 1598 int const gain2 = (int)(mlRep * 3); 1651 - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); 1599 + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1); 1652 1600 if ((mlRep >= 4) && (gain2 > gain1)) 1653 - matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; 1601 + matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip; 1654 1602 } 1655 1603 } 1656 - { size_t offset2=999999999; 1657 - size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, dictMode); 1658 - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ 1659 - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4); 1604 + { size_t ofbCandidate=999999999; 1605 + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode); 1606 + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ 1607 + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4); 1660 1608 if ((ml2 >= 4) && (gain2 > gain1)) { 1661 - matchLength = ml2, offcode = offset2, start = ip; 1609 + matchLength = ml2, offBase = ofbCandidate, start = ip; 1662 1610 continue; /* search a better one */ 1663 1611 } } 1664 1612 ··· 1667 1615 DEBUGLOG(7, "search depth 2"); 1668 1616 ip ++; 1669 1617 if ( (dictMode == ZSTD_noDict) 1670 - && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { 1618 + && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { 1671 1619 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; 1672 1620 int const gain2 = (int)(mlRep * 4); 1673 - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); 1621 + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1); 1674 1622 if ((mlRep >= 4) && (gain2 > gain1)) 1675 - matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; 1623 + matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip; 1676 1624 } 1677 1625 if (isDxS) { 1678 1626 const U32 repIndex = (U32)(ip - base) - offset_1; 1679 1627 const BYTE* repMatch = repIndex < prefixLowestIndex ? 1680 1628 dictBase + (repIndex - dictIndexDelta) : 1681 1629 base + repIndex; 1682 - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) 1630 + if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex)) 1683 1631 && (MEM_read32(repMatch) == MEM_read32(ip)) ) { 1684 1632 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; 1685 1633 size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; 1686 1634 int const gain2 = (int)(mlRep * 4); 1687 - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); 1635 + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1); 1688 1636 if ((mlRep >= 4) && (gain2 > gain1)) 1689 - matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip; 1637 + matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip; 1690 1638 } 1691 1639 } 1692 - { size_t offset2=999999999; 1693 - size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, dictMode); 1694 - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ 1695 - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7); 1640 + { size_t ofbCandidate=999999999; 1641 + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode); 1642 + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ 1643 + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7); 1696 1644 if ((ml2 >= 4) && (gain2 > gain1)) { 1697 - matchLength = ml2, offcode = offset2, start = ip; 1645 + matchLength = ml2, offBase = ofbCandidate, start = ip; 1698 1646 continue; 1699 1647 } } } 1700 1648 break; /* nothing found : store previous solution */ ··· 1705 1653 * notably if `value` is unsigned, resulting in a large positive `-value`. 1706 1654 */ 1707 1655 /* catch up */ 1708 - if (STORED_IS_OFFSET(offcode)) { 1656 + if (OFFBASE_IS_OFFSET(offBase)) { 1709 1657 if (dictMode == ZSTD_noDict) { 1710 - while ( ((start > anchor) & (start - STORED_OFFSET(offcode) > prefixLowest)) 1711 - && (start[-1] == (start-STORED_OFFSET(offcode))[-1]) ) /* only search for offset within prefix */ 1658 + while ( ((start > anchor) & (start - OFFBASE_TO_OFFSET(offBase) > prefixLowest)) 1659 + && (start[-1] == (start-OFFBASE_TO_OFFSET(offBase))[-1]) ) /* only search for offset within prefix */ 1712 1660 { start--; matchLength++; } 1713 1661 } 1714 1662 if (isDxS) { 1715 - U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode)); 1663 + U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase)); 1716 1664 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; 1717 1665 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; 1718 1666 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ 1719 1667 } 1720 - offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode); 1668 + offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase); 1721 1669 } 1722 1670 /* store sequence */ 1723 1671 _storeSequence: 1724 1672 { size_t const litLength = (size_t)(start - anchor); 1725 - ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength); 1673 + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength); 1726 1674 anchor = ip = start + matchLength; 1675 + } 1676 + if (ms->lazySkipping) { 1677 + /* We've found a match, disable lazy skipping mode, and refill the hash cache. */ 1678 + if (searchMethod == search_rowHash) { 1679 + ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); 1680 + } 1681 + ms->lazySkipping = 0; 1727 1682 } 1728 1683 1729 1684 /* check immediate repcode */ ··· 1741 1682 const BYTE* repMatch = repIndex < prefixLowestIndex ? 1742 1683 dictBase - dictIndexDelta + repIndex : 1743 1684 base + repIndex; 1744 - if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) 1685 + if ( (ZSTD_index_overlap_check(prefixLowestIndex, repIndex)) 1745 1686 && (MEM_read32(repMatch) == MEM_read32(ip)) ) { 1746 1687 const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; 1747 1688 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; 1748 - offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap offset_2 <=> offset_1 */ 1749 - ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength); 1689 + offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset_2 <=> offset_1 */ 1690 + ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength); 1750 1691 ip += matchLength; 1751 1692 anchor = ip; 1752 1693 continue; ··· 1760 1701 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { 1761 1702 /* store sequence */ 1762 1703 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; 1763 - offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap repcodes */ 1764 - ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength); 1704 + offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap repcodes */ 1705 + ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength); 1765 1706 ip += matchLength; 1766 1707 anchor = ip; 1767 1708 continue; /* faster when present ... (?) */ 1768 1709 } } } 1769 1710 1770 - /* Save reps for next block */ 1771 - rep[0] = offset_1 ? offset_1 : savedOffset; 1772 - rep[1] = offset_2 ? offset_2 : savedOffset; 1711 + /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), 1712 + * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ 1713 + offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; 1714 + 1715 + /* save reps for next block */ 1716 + rep[0] = offset_1 ? offset_1 : offsetSaved1; 1717 + rep[1] = offset_2 ? offset_2 : offsetSaved2; 1773 1718 1774 1719 /* Return the last literals size */ 1775 1720 return (size_t)(iend - anchor); 1776 1721 } 1722 + #endif /* build exclusions */ 1777 1723 1778 1724 1779 - size_t ZSTD_compressBlock_btlazy2( 1780 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1781 - void const* src, size_t srcSize) 1782 - { 1783 - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); 1784 - } 1785 - 1786 - size_t ZSTD_compressBlock_lazy2( 1787 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1788 - void const* src, size_t srcSize) 1789 - { 1790 - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); 1791 - } 1792 - 1793 - size_t ZSTD_compressBlock_lazy( 1794 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1795 - void const* src, size_t srcSize) 1796 - { 1797 - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); 1798 - } 1799 - 1725 + #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR 1800 1726 size_t ZSTD_compressBlock_greedy( 1801 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1727 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1802 1728 void const* src, size_t srcSize) 1803 1729 { 1804 1730 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); 1805 1731 } 1806 1732 1807 - size_t ZSTD_compressBlock_btlazy2_dictMatchState( 1808 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1809 - void const* src, size_t srcSize) 1810 - { 1811 - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); 1812 - } 1813 - 1814 - size_t ZSTD_compressBlock_lazy2_dictMatchState( 1815 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1816 - void const* src, size_t srcSize) 1817 - { 1818 - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); 1819 - } 1820 - 1821 - size_t ZSTD_compressBlock_lazy_dictMatchState( 1822 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1823 - void const* src, size_t srcSize) 1824 - { 1825 - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); 1826 - } 1827 - 1828 1733 size_t ZSTD_compressBlock_greedy_dictMatchState( 1829 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1734 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1830 1735 void const* src, size_t srcSize) 1831 1736 { 1832 1737 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); 1833 1738 } 1834 1739 1835 - 1836 - size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( 1837 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1838 - void const* src, size_t srcSize) 1839 - { 1840 - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); 1841 - } 1842 - 1843 - size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( 1844 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1845 - void const* src, size_t srcSize) 1846 - { 1847 - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); 1848 - } 1849 - 1850 1740 size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( 1851 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1741 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1852 1742 void const* src, size_t srcSize) 1853 1743 { 1854 1744 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); 1855 1745 } 1856 1746 1857 - /* Row-based matchfinder */ 1858 - size_t ZSTD_compressBlock_lazy2_row( 1859 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1860 - void const* src, size_t srcSize) 1861 - { 1862 - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict); 1863 - } 1864 - 1865 - size_t ZSTD_compressBlock_lazy_row( 1866 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1867 - void const* src, size_t srcSize) 1868 - { 1869 - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict); 1870 - } 1871 - 1872 1747 size_t ZSTD_compressBlock_greedy_row( 1873 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1748 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1874 1749 void const* src, size_t srcSize) 1875 1750 { 1876 1751 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict); 1877 1752 } 1878 1753 1879 - size_t ZSTD_compressBlock_lazy2_dictMatchState_row( 1880 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1881 - void const* src, size_t srcSize) 1882 - { 1883 - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState); 1884 - } 1885 - 1886 - size_t ZSTD_compressBlock_lazy_dictMatchState_row( 1887 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1888 - void const* src, size_t srcSize) 1889 - { 1890 - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState); 1891 - } 1892 - 1893 1754 size_t ZSTD_compressBlock_greedy_dictMatchState_row( 1894 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1755 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1895 1756 void const* src, size_t srcSize) 1896 1757 { 1897 1758 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState); 1898 1759 } 1899 1760 1900 - 1901 - size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( 1902 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1903 - void const* src, size_t srcSize) 1904 - { 1905 - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch); 1906 - } 1907 - 1908 - size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( 1909 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1910 - void const* src, size_t srcSize) 1911 - { 1912 - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch); 1913 - } 1914 - 1915 1761 size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( 1916 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1762 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1917 1763 void const* src, size_t srcSize) 1918 1764 { 1919 1765 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch); 1920 1766 } 1767 + #endif 1921 1768 1769 + #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR 1770 + size_t ZSTD_compressBlock_lazy( 1771 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1772 + void const* src, size_t srcSize) 1773 + { 1774 + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); 1775 + } 1776 + 1777 + size_t ZSTD_compressBlock_lazy_dictMatchState( 1778 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1779 + void const* src, size_t srcSize) 1780 + { 1781 + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); 1782 + } 1783 + 1784 + size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( 1785 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1786 + void const* src, size_t srcSize) 1787 + { 1788 + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); 1789 + } 1790 + 1791 + size_t ZSTD_compressBlock_lazy_row( 1792 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1793 + void const* src, size_t srcSize) 1794 + { 1795 + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict); 1796 + } 1797 + 1798 + size_t ZSTD_compressBlock_lazy_dictMatchState_row( 1799 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1800 + void const* src, size_t srcSize) 1801 + { 1802 + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState); 1803 + } 1804 + 1805 + size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( 1806 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1807 + void const* src, size_t srcSize) 1808 + { 1809 + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch); 1810 + } 1811 + #endif 1812 + 1813 + #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR 1814 + size_t ZSTD_compressBlock_lazy2( 1815 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1816 + void const* src, size_t srcSize) 1817 + { 1818 + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); 1819 + } 1820 + 1821 + size_t ZSTD_compressBlock_lazy2_dictMatchState( 1822 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1823 + void const* src, size_t srcSize) 1824 + { 1825 + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); 1826 + } 1827 + 1828 + size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( 1829 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1830 + void const* src, size_t srcSize) 1831 + { 1832 + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); 1833 + } 1834 + 1835 + size_t ZSTD_compressBlock_lazy2_row( 1836 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1837 + void const* src, size_t srcSize) 1838 + { 1839 + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict); 1840 + } 1841 + 1842 + size_t ZSTD_compressBlock_lazy2_dictMatchState_row( 1843 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1844 + void const* src, size_t srcSize) 1845 + { 1846 + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState); 1847 + } 1848 + 1849 + size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( 1850 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1851 + void const* src, size_t srcSize) 1852 + { 1853 + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch); 1854 + } 1855 + #endif 1856 + 1857 + #ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR 1858 + size_t ZSTD_compressBlock_btlazy2( 1859 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1860 + void const* src, size_t srcSize) 1861 + { 1862 + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); 1863 + } 1864 + 1865 + size_t ZSTD_compressBlock_btlazy2_dictMatchState( 1866 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1867 + void const* src, size_t srcSize) 1868 + { 1869 + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); 1870 + } 1871 + #endif 1872 + 1873 + #if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ 1874 + || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ 1875 + || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ 1876 + || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) 1922 1877 FORCE_INLINE_TEMPLATE 1878 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1923 1879 size_t ZSTD_compressBlock_lazy_extDict_generic( 1924 - ZSTD_matchState_t* ms, seqStore_t* seqStore, 1880 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, 1925 1881 U32 rep[ZSTD_REP_NUM], 1926 1882 const void* src, size_t srcSize, 1927 1883 const searchMethod_e searchMethod, const U32 depth) ··· 1960 1886 1961 1887 DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod); 1962 1888 1889 + /* Reset the lazy skipping state */ 1890 + ms->lazySkipping = 0; 1891 + 1963 1892 /* init */ 1964 1893 ip += (ip == prefixStart); 1965 1894 if (searchMethod == search_rowHash) { 1966 - ZSTD_row_fillHashCache(ms, base, rowLog, 1967 - MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), 1968 - ms->nextToUpdate, ilimit); 1895 + ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); 1969 1896 } 1970 1897 1971 1898 /* Match Loop */ ··· 1978 1903 #endif 1979 1904 while (ip < ilimit) { 1980 1905 size_t matchLength=0; 1981 - size_t offcode=STORE_REPCODE_1; 1906 + size_t offBase = REPCODE1_TO_OFFBASE; 1982 1907 const BYTE* start=ip+1; 1983 1908 U32 curr = (U32)(ip-base); 1984 1909 ··· 1987 1912 const U32 repIndex = (U32)(curr+1 - offset_1); 1988 1913 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 1989 1914 const BYTE* const repMatch = repBase + repIndex; 1990 - if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ 1915 + if ( (ZSTD_index_overlap_check(dictLimit, repIndex)) 1991 1916 & (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */ 1992 1917 if (MEM_read32(ip+1) == MEM_read32(repMatch)) { 1993 1918 /* repcode detected we should take it */ ··· 1997 1922 } } 1998 1923 1999 1924 /* first search (depth 0) */ 2000 - { size_t offsetFound = 999999999; 2001 - size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offsetFound, mls, rowLog, searchMethod, ZSTD_extDict); 1925 + { size_t ofbCandidate = 999999999; 1926 + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict); 2002 1927 if (ml2 > matchLength) 2003 - matchLength = ml2, start = ip, offcode=offsetFound; 1928 + matchLength = ml2, start = ip, offBase = ofbCandidate; 2004 1929 } 2005 1930 2006 1931 if (matchLength < 4) { 2007 - ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ 1932 + size_t const step = ((size_t)(ip-anchor) >> kSearchStrength); 1933 + ip += step + 1; /* jump faster over incompressible sections */ 1934 + /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time. 1935 + * In this mode we stop inserting every position into our tables, and only insert 1936 + * positions that we search, which is one in step positions. 1937 + * The exact cutoff is flexible, I've just chosen a number that is reasonably high, 1938 + * so we minimize the compression ratio loss in "normal" scenarios. This mode gets 1939 + * triggered once we've gone 2KB without finding any matches. 1940 + */ 1941 + ms->lazySkipping = step > kLazySkippingStep; 2008 1942 continue; 2009 1943 } 2010 1944 ··· 2023 1939 ip ++; 2024 1940 curr++; 2025 1941 /* check repCode */ 2026 - if (offcode) { 1942 + if (offBase) { 2027 1943 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog); 2028 1944 const U32 repIndex = (U32)(curr - offset_1); 2029 1945 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 2030 1946 const BYTE* const repMatch = repBase + repIndex; 2031 - if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ 1947 + if ( (ZSTD_index_overlap_check(dictLimit, repIndex)) 2032 1948 & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ 2033 1949 if (MEM_read32(ip) == MEM_read32(repMatch)) { 2034 1950 /* repcode detected */ 2035 1951 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 2036 1952 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 2037 1953 int const gain2 = (int)(repLength * 3); 2038 - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); 1954 + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1); 2039 1955 if ((repLength >= 4) && (gain2 > gain1)) 2040 - matchLength = repLength, offcode = STORE_REPCODE_1, start = ip; 1956 + matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip; 2041 1957 } } 2042 1958 2043 1959 /* search match, depth 1 */ 2044 - { size_t offset2=999999999; 2045 - size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, ZSTD_extDict); 2046 - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ 2047 - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4); 1960 + { size_t ofbCandidate = 999999999; 1961 + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict); 1962 + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ 1963 + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4); 2048 1964 if ((ml2 >= 4) && (gain2 > gain1)) { 2049 - matchLength = ml2, offcode = offset2, start = ip; 1965 + matchLength = ml2, offBase = ofbCandidate, start = ip; 2050 1966 continue; /* search a better one */ 2051 1967 } } 2052 1968 ··· 2055 1971 ip ++; 2056 1972 curr++; 2057 1973 /* check repCode */ 2058 - if (offcode) { 1974 + if (offBase) { 2059 1975 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog); 2060 1976 const U32 repIndex = (U32)(curr - offset_1); 2061 1977 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 2062 1978 const BYTE* const repMatch = repBase + repIndex; 2063 - if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ 1979 + if ( (ZSTD_index_overlap_check(dictLimit, repIndex)) 2064 1980 & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ 2065 1981 if (MEM_read32(ip) == MEM_read32(repMatch)) { 2066 1982 /* repcode detected */ 2067 1983 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 2068 1984 size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 2069 1985 int const gain2 = (int)(repLength * 4); 2070 - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1); 1986 + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1); 2071 1987 if ((repLength >= 4) && (gain2 > gain1)) 2072 - matchLength = repLength, offcode = STORE_REPCODE_1, start = ip; 1988 + matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip; 2073 1989 } } 2074 1990 2075 1991 /* search match, depth 2 */ 2076 - { size_t offset2=999999999; 2077 - size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, ZSTD_extDict); 2078 - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */ 2079 - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7); 1992 + { size_t ofbCandidate = 999999999; 1993 + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict); 1994 + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ 1995 + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7); 2080 1996 if ((ml2 >= 4) && (gain2 > gain1)) { 2081 - matchLength = ml2, offcode = offset2, start = ip; 1997 + matchLength = ml2, offBase = ofbCandidate, start = ip; 2082 1998 continue; 2083 1999 } } } 2084 2000 break; /* nothing found : store previous solution */ 2085 2001 } 2086 2002 2087 2003 /* catch up */ 2088 - if (STORED_IS_OFFSET(offcode)) { 2089 - U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode)); 2004 + if (OFFBASE_IS_OFFSET(offBase)) { 2005 + U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase)); 2090 2006 const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; 2091 2007 const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; 2092 2008 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ 2093 - offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode); 2009 + offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase); 2094 2010 } 2095 2011 2096 2012 /* store sequence */ 2097 2013 _storeSequence: 2098 2014 { size_t const litLength = (size_t)(start - anchor); 2099 - ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength); 2015 + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength); 2100 2016 anchor = ip = start + matchLength; 2017 + } 2018 + if (ms->lazySkipping) { 2019 + /* We've found a match, disable lazy skipping mode, and refill the hash cache. */ 2020 + if (searchMethod == search_rowHash) { 2021 + ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); 2022 + } 2023 + ms->lazySkipping = 0; 2101 2024 } 2102 2025 2103 2026 /* check immediate repcode */ ··· 2114 2023 const U32 repIndex = repCurrent - offset_2; 2115 2024 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; 2116 2025 const BYTE* const repMatch = repBase + repIndex; 2117 - if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ 2026 + if ( (ZSTD_index_overlap_check(dictLimit, repIndex)) 2118 2027 & (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ 2119 2028 if (MEM_read32(ip) == MEM_read32(repMatch)) { 2120 2029 /* repcode detected we should take it */ 2121 2030 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; 2122 2031 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; 2123 - offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap offset history */ 2124 - ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength); 2032 + offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset history */ 2033 + ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength); 2125 2034 ip += matchLength; 2126 2035 anchor = ip; 2127 2036 continue; /* faster when present ... (?) */ ··· 2136 2045 /* Return the last literals size */ 2137 2046 return (size_t)(iend - anchor); 2138 2047 } 2048 + #endif /* build exclusions */ 2139 2049 2140 - 2050 + #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR 2141 2051 size_t ZSTD_compressBlock_greedy_extDict( 2142 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 2052 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 2143 2053 void const* src, size_t srcSize) 2144 2054 { 2145 2055 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); 2146 2056 } 2147 2057 2058 + size_t ZSTD_compressBlock_greedy_extDict_row( 2059 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 2060 + void const* src, size_t srcSize) 2061 + { 2062 + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0); 2063 + } 2064 + #endif 2065 + 2066 + #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR 2148 2067 size_t ZSTD_compressBlock_lazy_extDict( 2149 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 2068 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 2150 2069 void const* src, size_t srcSize) 2151 2070 2152 2071 { 2153 2072 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); 2154 2073 } 2155 2074 2075 + size_t ZSTD_compressBlock_lazy_extDict_row( 2076 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 2077 + void const* src, size_t srcSize) 2078 + 2079 + { 2080 + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1); 2081 + } 2082 + #endif 2083 + 2084 + #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR 2156 2085 size_t ZSTD_compressBlock_lazy2_extDict( 2157 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 2086 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 2158 2087 void const* src, size_t srcSize) 2159 2088 2160 2089 { 2161 2090 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); 2162 2091 } 2163 2092 2093 + size_t ZSTD_compressBlock_lazy2_extDict_row( 2094 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 2095 + void const* src, size_t srcSize) 2096 + { 2097 + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); 2098 + } 2099 + #endif 2100 + 2101 + #ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR 2164 2102 size_t ZSTD_compressBlock_btlazy2_extDict( 2165 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 2103 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 2166 2104 void const* src, size_t srcSize) 2167 2105 2168 2106 { 2169 2107 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); 2170 2108 } 2171 - 2172 - size_t ZSTD_compressBlock_greedy_extDict_row( 2173 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 2174 - void const* src, size_t srcSize) 2175 - { 2176 - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0); 2177 - } 2178 - 2179 - size_t ZSTD_compressBlock_lazy_extDict_row( 2180 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 2181 - void const* src, size_t srcSize) 2182 - 2183 - { 2184 - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1); 2185 - } 2186 - 2187 - size_t ZSTD_compressBlock_lazy2_extDict_row( 2188 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 2189 - void const* src, size_t srcSize) 2190 - 2191 - { 2192 - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); 2193 - } 2109 + #endif
+143 -68
lib/zstd/compress/zstd_lazy.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 12 11 #ifndef ZSTD_LAZY_H 13 12 #define ZSTD_LAZY_H 14 13 15 - 16 14 #include "zstd_compress_internal.h" 17 15 18 16 /* ··· 22 22 */ 23 23 #define ZSTD_LAZY_DDSS_BUCKET_LOG 2 24 24 25 - U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); 26 - void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip); 25 + #define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ 27 26 28 - void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); 27 + #if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ 28 + || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ 29 + || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ 30 + || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) 31 + U32 ZSTD_insertAndFindFirstIndex(ZSTD_MatchState_t* ms, const BYTE* ip); 32 + void ZSTD_row_update(ZSTD_MatchState_t* const ms, const BYTE* ip); 33 + 34 + void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_MatchState_t* ms, const BYTE* const ip); 29 35 30 36 void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ 37 + #endif 31 38 32 - size_t ZSTD_compressBlock_btlazy2( 33 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 34 - void const* src, size_t srcSize); 35 - size_t ZSTD_compressBlock_lazy2( 36 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 37 - void const* src, size_t srcSize); 38 - size_t ZSTD_compressBlock_lazy( 39 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 40 - void const* src, size_t srcSize); 39 + #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR 41 40 size_t ZSTD_compressBlock_greedy( 42 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 43 - void const* src, size_t srcSize); 44 - size_t ZSTD_compressBlock_lazy2_row( 45 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 46 - void const* src, size_t srcSize); 47 - size_t ZSTD_compressBlock_lazy_row( 48 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 41 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 49 42 void const* src, size_t srcSize); 50 43 size_t ZSTD_compressBlock_greedy_row( 51 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 52 - void const* src, size_t srcSize); 53 - 54 - size_t ZSTD_compressBlock_btlazy2_dictMatchState( 55 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 56 - void const* src, size_t srcSize); 57 - size_t ZSTD_compressBlock_lazy2_dictMatchState( 58 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 59 - void const* src, size_t srcSize); 60 - size_t ZSTD_compressBlock_lazy_dictMatchState( 61 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 44 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 62 45 void const* src, size_t srcSize); 63 46 size_t ZSTD_compressBlock_greedy_dictMatchState( 64 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 65 - void const* src, size_t srcSize); 66 - size_t ZSTD_compressBlock_lazy2_dictMatchState_row( 67 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 68 - void const* src, size_t srcSize); 69 - size_t ZSTD_compressBlock_lazy_dictMatchState_row( 70 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 47 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 71 48 void const* src, size_t srcSize); 72 49 size_t ZSTD_compressBlock_greedy_dictMatchState_row( 73 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 74 - void const* src, size_t srcSize); 75 - 76 - size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( 77 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 78 - void const* src, size_t srcSize); 79 - size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( 80 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 50 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 81 51 void const* src, size_t srcSize); 82 52 size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( 83 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 84 - void const* src, size_t srcSize); 85 - size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( 86 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 87 - void const* src, size_t srcSize); 88 - size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( 89 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 53 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 90 54 void const* src, size_t srcSize); 91 55 size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( 92 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 56 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 93 57 void const* src, size_t srcSize); 94 - 95 58 size_t ZSTD_compressBlock_greedy_extDict( 96 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 97 - void const* src, size_t srcSize); 98 - size_t ZSTD_compressBlock_lazy_extDict( 99 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 100 - void const* src, size_t srcSize); 101 - size_t ZSTD_compressBlock_lazy2_extDict( 102 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 59 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 103 60 void const* src, size_t srcSize); 104 61 size_t ZSTD_compressBlock_greedy_extDict_row( 105 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 62 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 63 + void const* src, size_t srcSize); 64 + 65 + #define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy 66 + #define ZSTD_COMPRESSBLOCK_GREEDY_ROW ZSTD_compressBlock_greedy_row 67 + #define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE ZSTD_compressBlock_greedy_dictMatchState 68 + #define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW ZSTD_compressBlock_greedy_dictMatchState_row 69 + #define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH ZSTD_compressBlock_greedy_dedicatedDictSearch 70 + #define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_greedy_dedicatedDictSearch_row 71 + #define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT ZSTD_compressBlock_greedy_extDict 72 + #define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW ZSTD_compressBlock_greedy_extDict_row 73 + #else 74 + #define ZSTD_COMPRESSBLOCK_GREEDY NULL 75 + #define ZSTD_COMPRESSBLOCK_GREEDY_ROW NULL 76 + #define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE NULL 77 + #define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW NULL 78 + #define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH NULL 79 + #define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW NULL 80 + #define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT NULL 81 + #define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW NULL 82 + #endif 83 + 84 + #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR 85 + size_t ZSTD_compressBlock_lazy( 86 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 87 + void const* src, size_t srcSize); 88 + size_t ZSTD_compressBlock_lazy_row( 89 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 90 + void const* src, size_t srcSize); 91 + size_t ZSTD_compressBlock_lazy_dictMatchState( 92 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 93 + void const* src, size_t srcSize); 94 + size_t ZSTD_compressBlock_lazy_dictMatchState_row( 95 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 96 + void const* src, size_t srcSize); 97 + size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( 98 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 99 + void const* src, size_t srcSize); 100 + size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( 101 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 102 + void const* src, size_t srcSize); 103 + size_t ZSTD_compressBlock_lazy_extDict( 104 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 106 105 void const* src, size_t srcSize); 107 106 size_t ZSTD_compressBlock_lazy_extDict_row( 108 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 107 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 108 + void const* src, size_t srcSize); 109 + 110 + #define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy 111 + #define ZSTD_COMPRESSBLOCK_LAZY_ROW ZSTD_compressBlock_lazy_row 112 + #define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE ZSTD_compressBlock_lazy_dictMatchState 113 + #define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy_dictMatchState_row 114 + #define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy_dedicatedDictSearch 115 + #define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy_dedicatedDictSearch_row 116 + #define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT ZSTD_compressBlock_lazy_extDict 117 + #define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW ZSTD_compressBlock_lazy_extDict_row 118 + #else 119 + #define ZSTD_COMPRESSBLOCK_LAZY NULL 120 + #define ZSTD_COMPRESSBLOCK_LAZY_ROW NULL 121 + #define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE NULL 122 + #define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW NULL 123 + #define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH NULL 124 + #define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW NULL 125 + #define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT NULL 126 + #define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW NULL 127 + #endif 128 + 129 + #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR 130 + size_t ZSTD_compressBlock_lazy2( 131 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 132 + void const* src, size_t srcSize); 133 + size_t ZSTD_compressBlock_lazy2_row( 134 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 135 + void const* src, size_t srcSize); 136 + size_t ZSTD_compressBlock_lazy2_dictMatchState( 137 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 138 + void const* src, size_t srcSize); 139 + size_t ZSTD_compressBlock_lazy2_dictMatchState_row( 140 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 141 + void const* src, size_t srcSize); 142 + size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( 143 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 144 + void const* src, size_t srcSize); 145 + size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( 146 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 147 + void const* src, size_t srcSize); 148 + size_t ZSTD_compressBlock_lazy2_extDict( 149 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 109 150 void const* src, size_t srcSize); 110 151 size_t ZSTD_compressBlock_lazy2_extDict_row( 111 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 152 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 153 + void const* src, size_t srcSize); 154 + 155 + #define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2 156 + #define ZSTD_COMPRESSBLOCK_LAZY2_ROW ZSTD_compressBlock_lazy2_row 157 + #define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE ZSTD_compressBlock_lazy2_dictMatchState 158 + #define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy2_dictMatchState_row 159 + #define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy2_dedicatedDictSearch 160 + #define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy2_dedicatedDictSearch_row 161 + #define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT ZSTD_compressBlock_lazy2_extDict 162 + #define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW ZSTD_compressBlock_lazy2_extDict_row 163 + #else 164 + #define ZSTD_COMPRESSBLOCK_LAZY2 NULL 165 + #define ZSTD_COMPRESSBLOCK_LAZY2_ROW NULL 166 + #define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE NULL 167 + #define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW NULL 168 + #define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH NULL 169 + #define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW NULL 170 + #define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT NULL 171 + #define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW NULL 172 + #endif 173 + 174 + #ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR 175 + size_t ZSTD_compressBlock_btlazy2( 176 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 177 + void const* src, size_t srcSize); 178 + size_t ZSTD_compressBlock_btlazy2_dictMatchState( 179 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 112 180 void const* src, size_t srcSize); 113 181 size_t ZSTD_compressBlock_btlazy2_extDict( 114 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 182 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 115 183 void const* src, size_t srcSize); 116 - 117 184 185 + #define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2 186 + #define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE ZSTD_compressBlock_btlazy2_dictMatchState 187 + #define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT ZSTD_compressBlock_btlazy2_extDict 188 + #else 189 + #define ZSTD_COMPRESSBLOCK_BTLAZY2 NULL 190 + #define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE NULL 191 + #define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT NULL 192 + #endif 118 193 119 194 #endif /* ZSTD_LAZY_H */
+63 -41
lib/zstd/compress/zstd_ldm.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 17 16 #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ 18 17 #include "zstd_ldm_geartab.h" 19 18 20 - #define LDM_BUCKET_SIZE_LOG 3 19 + #define LDM_BUCKET_SIZE_LOG 4 21 20 #define LDM_MIN_MATCH_LENGTH 64 22 21 #define LDM_HASH_RLOG 7 23 22 ··· 134 133 } 135 134 136 135 void ZSTD_ldm_adjustParameters(ldmParams_t* params, 137 - ZSTD_compressionParameters const* cParams) 136 + const ZSTD_compressionParameters* cParams) 138 137 { 139 138 params->windowLog = cParams->windowLog; 140 139 ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); 141 140 DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); 142 - if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; 143 - if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; 144 - if (params->hashLog == 0) { 145 - params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); 146 - assert(params->hashLog <= ZSTD_HASHLOG_MAX); 147 - } 148 141 if (params->hashRateLog == 0) { 149 - params->hashRateLog = params->windowLog < params->hashLog 150 - ? 0 151 - : params->windowLog - params->hashLog; 142 + if (params->hashLog > 0) { 143 + /* if params->hashLog is set, derive hashRateLog from it */ 144 + assert(params->hashLog <= ZSTD_HASHLOG_MAX); 145 + if (params->windowLog > params->hashLog) { 146 + params->hashRateLog = params->windowLog - params->hashLog; 147 + } 148 + } else { 149 + assert(1 <= (int)cParams->strategy && (int)cParams->strategy <= 9); 150 + /* mapping from [fast, rate7] to [btultra2, rate4] */ 151 + params->hashRateLog = 7 - (cParams->strategy/3); 152 + } 153 + } 154 + if (params->hashLog == 0) { 155 + params->hashLog = BOUNDED(ZSTD_HASHLOG_MIN, params->windowLog - params->hashRateLog, ZSTD_HASHLOG_MAX); 156 + } 157 + if (params->minMatchLength == 0) { 158 + params->minMatchLength = LDM_MIN_MATCH_LENGTH; 159 + if (cParams->strategy >= ZSTD_btultra) 160 + params->minMatchLength /= 2; 161 + } 162 + if (params->bucketSizeLog==0) { 163 + assert(1 <= (int)cParams->strategy && (int)cParams->strategy <= 9); 164 + params->bucketSizeLog = BOUNDED(LDM_BUCKET_SIZE_LOG, (U32)cParams->strategy, ZSTD_LDM_BUCKETSIZELOG_MAX); 152 165 } 153 166 params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); 154 167 } ··· 185 170 /* ZSTD_ldm_getBucket() : 186 171 * Returns a pointer to the start of the bucket associated with hash. */ 187 172 static ldmEntry_t* ZSTD_ldm_getBucket( 188 - ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) 173 + const ldmState_t* ldmState, size_t hash, U32 const bucketSizeLog) 189 174 { 190 - return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); 175 + return ldmState->hashTable + (hash << bucketSizeLog); 191 176 } 192 177 193 178 /* ZSTD_ldm_insertEntry() : 194 179 * Insert the entry with corresponding hash into the hash table */ 195 180 static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, 196 181 size_t const hash, const ldmEntry_t entry, 197 - ldmParams_t const ldmParams) 182 + U32 const bucketSizeLog) 198 183 { 199 184 BYTE* const pOffset = ldmState->bucketOffsets + hash; 200 185 unsigned const offset = *pOffset; 201 186 202 - *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry; 203 - *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1)); 187 + *(ZSTD_ldm_getBucket(ldmState, hash, bucketSizeLog) + offset) = entry; 188 + *pOffset = (BYTE)((offset + 1) & ((1u << bucketSizeLog) - 1)); 204 189 205 190 } 206 191 ··· 249 234 * 250 235 * The tables for the other strategies are filled within their 251 236 * block compressors. */ 252 - static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, 237 + static size_t ZSTD_ldm_fillFastTables(ZSTD_MatchState_t* ms, 253 238 void const* end) 254 239 { 255 240 const BYTE* const iend = (const BYTE*)end; ··· 257 242 switch(ms->cParams.strategy) 258 243 { 259 244 case ZSTD_fast: 260 - ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); 245 + ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx); 261 246 break; 262 247 263 248 case ZSTD_dfast: 264 - ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); 249 + #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR 250 + ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx); 251 + #else 252 + assert(0); /* shouldn't be called: cparams should've been adjusted. */ 253 + #endif 265 254 break; 266 255 267 256 case ZSTD_greedy: ··· 288 269 const BYTE* iend, ldmParams_t const* params) 289 270 { 290 271 U32 const minMatchLength = params->minMatchLength; 291 - U32 const hBits = params->hashLog - params->bucketSizeLog; 272 + U32 const bucketSizeLog = params->bucketSizeLog; 273 + U32 const hBits = params->hashLog - bucketSizeLog; 292 274 BYTE const* const base = ldmState->window.base; 293 275 BYTE const* const istart = ip; 294 276 ldmRollingHashState_t hashState; ··· 304 284 unsigned n; 305 285 306 286 numSplits = 0; 307 - hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits); 287 + hashed = ZSTD_ldm_gear_feed(&hashState, ip, (size_t)(iend - ip), splits, &numSplits); 308 288 309 289 for (n = 0; n < numSplits; n++) { 310 290 if (ip + splits[n] >= istart + minMatchLength) { ··· 315 295 316 296 entry.offset = (U32)(split - base); 317 297 entry.checksum = (U32)(xxhash >> 32); 318 - ZSTD_ldm_insertEntry(ldmState, hash, entry, *params); 298 + ZSTD_ldm_insertEntry(ldmState, hash, entry, params->bucketSizeLog); 319 299 } 320 300 } 321 301 ··· 329 309 * Sets cctx->nextToUpdate to a position corresponding closer to anchor 330 310 * if it is far way 331 311 * (after a long match, only update tables a limited amount). */ 332 - static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) 312 + static void ZSTD_ldm_limitTableUpdate(ZSTD_MatchState_t* ms, const BYTE* anchor) 333 313 { 334 314 U32 const curr = (U32)(anchor - ms->window.base); 335 315 if (curr > ms->nextToUpdate + 1024) { ··· 338 318 } 339 319 } 340 320 341 - static size_t ZSTD_ldm_generateSequences_internal( 342 - ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, 321 + static 322 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 323 + size_t ZSTD_ldm_generateSequences_internal( 324 + ldmState_t* ldmState, RawSeqStore_t* rawSeqStore, 343 325 ldmParams_t const* params, void const* src, size_t srcSize) 344 326 { 345 327 /* LDM parameters */ ··· 395 373 candidates[n].split = split; 396 374 candidates[n].hash = hash; 397 375 candidates[n].checksum = (U32)(xxhash >> 32); 398 - candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params); 376 + candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, params->bucketSizeLog); 399 377 PREFETCH_L1(candidates[n].bucket); 400 378 } 401 379 ··· 418 396 * the previous one, we merely register it in the hash table and 419 397 * move on */ 420 398 if (split < anchor) { 421 - ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); 399 + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog); 422 400 continue; 423 401 } 424 402 ··· 465 443 /* No match found -- insert an entry into the hash table 466 444 * and process the next candidate match */ 467 445 if (bestEntry == NULL) { 468 - ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); 446 + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog); 469 447 continue; 470 448 } 471 449 ··· 486 464 487 465 /* Insert the current entry into the hash table --- it must be 488 466 * done after the previous block to avoid clobbering bestEntry */ 489 - ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); 467 + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, params->bucketSizeLog); 490 468 491 469 anchor = split + forwardMatchLength; 492 470 ··· 525 503 } 526 504 527 505 size_t ZSTD_ldm_generateSequences( 528 - ldmState_t* ldmState, rawSeqStore_t* sequences, 506 + ldmState_t* ldmState, RawSeqStore_t* sequences, 529 507 ldmParams_t const* params, void const* src, size_t srcSize) 530 508 { 531 509 U32 const maxDist = 1U << params->windowLog; ··· 571 549 * the window through early invalidation. 572 550 * TODO: * Test the chunk size. 573 551 * * Try invalidation after the sequence generation and test the 574 - * the offset against maxDist directly. 552 + * offset against maxDist directly. 575 553 * 576 554 * NOTE: Because of dictionaries + sequence splitting we MUST make sure 577 555 * that any offset used is valid at the END of the sequence, since it may ··· 602 580 } 603 581 604 582 void 605 - ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) 583 + ZSTD_ldm_skipSequences(RawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) 606 584 { 607 585 while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { 608 586 rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; ··· 638 616 * Returns the current sequence to handle, or if the rest of the block should 639 617 * be literals, it returns a sequence with offset == 0. 640 618 */ 641 - static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, 619 + static rawSeq maybeSplitSequence(RawSeqStore_t* rawSeqStore, 642 620 U32 const remaining, U32 const minMatch) 643 621 { 644 622 rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; ··· 662 640 return sequence; 663 641 } 664 642 665 - void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { 643 + void ZSTD_ldm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes) { 666 644 U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); 667 645 while (currPos && rawSeqStore->pos < rawSeqStore->size) { 668 646 rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; ··· 679 657 } 680 658 } 681 659 682 - size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, 683 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 684 - ZSTD_paramSwitch_e useRowMatchFinder, 660 + size_t ZSTD_ldm_blockCompress(RawSeqStore_t* rawSeqStore, 661 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 662 + ZSTD_ParamSwitch_e useRowMatchFinder, 685 663 void const* src, size_t srcSize) 686 664 { 687 665 const ZSTD_compressionParameters* const cParams = &ms->cParams; 688 666 unsigned const minMatch = cParams->minMatch; 689 - ZSTD_blockCompressor const blockCompressor = 667 + ZSTD_BlockCompressor_f const blockCompressor = 690 668 ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms)); 691 669 /* Input bounds */ 692 670 BYTE const* const istart = (BYTE const*)src; ··· 711 689 /* maybeSplitSequence updates rawSeqStore->pos */ 712 690 rawSeq const sequence = maybeSplitSequence(rawSeqStore, 713 691 (U32)(iend - ip), minMatch); 714 - int i; 715 692 /* End signal */ 716 693 if (sequence.offset == 0) 717 694 break; ··· 723 702 /* Run the block compressor */ 724 703 DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); 725 704 { 705 + int i; 726 706 size_t const newLitLength = 727 707 blockCompressor(ms, seqStore, rep, ip, sequence.litLength); 728 708 ip += sequence.litLength; ··· 733 711 rep[0] = sequence.offset; 734 712 /* Store the sequence */ 735 713 ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, 736 - STORE_OFFSET(sequence.offset), 714 + OFFSET_TO_OFFBASE(sequence.offset), 737 715 sequence.matchLength); 738 716 ip += sequence.matchLength; 739 717 }
+8 -9
lib/zstd/compress/zstd_ldm.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 11 10 12 11 #ifndef ZSTD_LDM_H 13 12 #define ZSTD_LDM_H 14 - 15 13 16 14 #include "zstd_compress_internal.h" /* ldmParams_t, U32 */ 17 15 #include <linux/zstd.h> /* ZSTD_CCtx, size_t */ ··· 40 40 * sequences. 41 41 */ 42 42 size_t ZSTD_ldm_generateSequences( 43 - ldmState_t* ldms, rawSeqStore_t* sequences, 43 + ldmState_t* ldms, RawSeqStore_t* sequences, 44 44 ldmParams_t const* params, void const* src, size_t srcSize); 45 45 46 46 /* ··· 61 61 * two. We handle that case correctly, and update `rawSeqStore` appropriately. 62 62 * NOTE: This function does not return any errors. 63 63 */ 64 - size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, 65 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 66 - ZSTD_paramSwitch_e useRowMatchFinder, 64 + size_t ZSTD_ldm_blockCompress(RawSeqStore_t* rawSeqStore, 65 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 66 + ZSTD_ParamSwitch_e useRowMatchFinder, 67 67 void const* src, size_t srcSize); 68 68 69 69 /* ··· 73 73 * Avoids emitting matches less than `minMatch` bytes. 74 74 * Must be called for data that is not passed to ZSTD_ldm_blockCompress(). 75 75 */ 76 - void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, 76 + void ZSTD_ldm_skipSequences(RawSeqStore_t* rawSeqStore, size_t srcSize, 77 77 U32 const minMatch); 78 78 79 79 /* ZSTD_ldm_skipRawSeqStoreBytes(): ··· 81 81 * Not to be used in conjunction with ZSTD_ldm_skipSequences(). 82 82 * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). 83 83 */ 84 - void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes); 84 + void ZSTD_ldm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes); 85 85 86 86 /* ZSTD_ldm_getTableSize() : 87 87 * Estimate the space needed for long distance matching tables or 0 if LDM is ··· 106 106 */ 107 107 void ZSTD_ldm_adjustParameters(ldmParams_t* params, 108 108 ZSTD_compressionParameters const* cParams); 109 - 110 109 111 110 #endif /* ZSTD_FAST_H */
+2 -1
lib/zstd/compress/zstd_ldm_geartab.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the
+354 -219
lib/zstd/compress/zstd_opt.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* 2 - * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 13 12 #include "hist.h" 14 13 #include "zstd_opt.h" 15 14 15 + #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ 16 + || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ 17 + || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) 16 18 17 19 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ 18 20 #define ZSTD_MAX_PRICE (1<<30) 19 21 20 - #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ 22 + #define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ 21 23 22 24 23 25 /*-************************************* ··· 30 26 #if 0 /* approximation at bit level (for tests) */ 31 27 # define BITCOST_ACCURACY 0 32 28 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) 33 - # define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat)) 29 + # define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat)) 34 30 #elif 0 /* fractional bit accuracy (for tests) */ 35 31 # define BITCOST_ACCURACY 8 36 32 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) 37 - # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) 33 + # define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat)) 38 34 #else /* opt==approx, ultra==accurate */ 39 35 # define BITCOST_ACCURACY 8 40 36 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) 41 - # define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) 37 + # define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) 42 38 #endif 43 39 40 + /* ZSTD_bitWeight() : 41 + * provide estimated "cost" of a stat in full bits only */ 44 42 MEM_STATIC U32 ZSTD_bitWeight(U32 stat) 45 43 { 46 44 return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); 47 45 } 48 46 47 + /* ZSTD_fracWeight() : 48 + * provide fractional-bit "cost" of a stat, 49 + * using linear interpolation approximation */ 49 50 MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) 50 51 { 51 52 U32 const stat = rawStat + 1; 52 53 U32 const hb = ZSTD_highbit32(stat); 53 54 U32 const BWeight = hb * BITCOST_MULTIPLIER; 55 + /* Fweight was meant for "Fractional weight" 56 + * but it's effectively a value between 1 and 2 57 + * using fixed point arithmetic */ 54 58 U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; 55 59 U32 const weight = BWeight + FWeight; 56 60 assert(hb + BITCOST_ACCURACY < 31); ··· 69 57 /* debugging function, 70 58 * @return price in bytes as fractional value 71 59 * for debug messages only */ 72 - MEM_STATIC double ZSTD_fCost(U32 price) 60 + MEM_STATIC double ZSTD_fCost(int price) 73 61 { 74 62 return (double)price / (BITCOST_MULTIPLIER*8); 75 63 } ··· 100 88 return total; 101 89 } 102 90 103 - static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift) 91 + typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e; 92 + 93 + static U32 94 + ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1) 104 95 { 105 96 U32 s, sum=0; 106 - DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift); 97 + DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", 98 + (unsigned)lastEltIndex+1, (unsigned)shift ); 107 99 assert(shift < 30); 108 100 for (s=0; s<lastEltIndex+1; s++) { 109 - table[s] = 1 + (table[s] >> shift); 110 - sum += table[s]; 101 + unsigned const base = base1 ? 1 : (table[s]>0); 102 + unsigned const newStat = base + (table[s] >> shift); 103 + sum += newStat; 104 + table[s] = newStat; 111 105 } 112 106 return sum; 113 107 } 114 108 115 109 /* ZSTD_scaleStats() : 116 - * reduce all elements in table is sum too large 110 + * reduce all elt frequencies in table if sum too large 117 111 * return the resulting sum of elements */ 118 112 static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) 119 113 { ··· 128 110 DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget); 129 111 assert(logTarget < 30); 130 112 if (factor <= 1) return prevsum; 131 - return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor)); 113 + return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed); 132 114 } 133 115 134 116 /* ZSTD_rescaleFreqs() : ··· 147 129 DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); 148 130 optPtr->priceType = zop_dynamic; 149 131 150 - if (optPtr->litLengthSum == 0) { /* first block : init */ 151 - if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ 152 - DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); 132 + if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */ 133 + 134 + /* heuristic: use pre-defined stats for too small inputs */ 135 + if (srcSize <= ZSTD_PREDEF_THRESHOLD) { 136 + DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD); 153 137 optPtr->priceType = zop_predef; 154 138 } 155 139 156 140 assert(optPtr->symbolCosts != NULL); 157 141 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { 158 - /* huffman table presumed generated by dictionary */ 142 + 143 + /* huffman stats covering the full value set : table presumed generated by dictionary */ 159 144 optPtr->priceType = zop_dynamic; 160 145 161 146 if (compressedLiterals) { 147 + /* generate literals statistics from huffman table */ 162 148 unsigned lit; 163 149 assert(optPtr->litFreq != NULL); 164 150 optPtr->litSum = 0; ··· 210 188 optPtr->offCodeSum += optPtr->offCodeFreq[of]; 211 189 } } 212 190 213 - } else { /* not a dictionary */ 191 + } else { /* first block, no dictionary */ 214 192 215 193 assert(optPtr->litFreq != NULL); 216 194 if (compressedLiterals) { 195 + /* base initial cost of literals on direct frequency within src */ 217 196 unsigned lit = MaxLit; 218 197 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ 219 - optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8); 198 + optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible); 220 199 } 221 200 222 201 { unsigned const baseLLfreqs[MaxLL+1] = { ··· 247 224 optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1); 248 225 } 249 226 250 - 251 227 } 252 228 253 - } else { /* new block : re-use previous statistics, scaled down */ 229 + } else { /* new block : scale down accumulated statistics */ 254 230 255 231 if (compressedLiterals) 256 232 optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12); ··· 268 246 const optState_t* const optPtr, 269 247 int optLevel) 270 248 { 249 + DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength); 271 250 if (litLength == 0) return 0; 272 251 273 252 if (!ZSTD_compressedLiterals(optPtr)) ··· 278 255 return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ 279 256 280 257 /* dynamic statistics */ 281 - { U32 price = litLength * optPtr->litSumBasePrice; 258 + { U32 price = optPtr->litSumBasePrice * litLength; 259 + U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER; 282 260 U32 u; 261 + assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER); 283 262 for (u=0; u < litLength; u++) { 284 - assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */ 285 - price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); 263 + U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel); 264 + if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax; 265 + price -= litPrice; 286 266 } 287 267 return price; 288 268 } ··· 298 272 assert(litLength <= ZSTD_BLOCKSIZE_MAX); 299 273 if (optPtr->priceType == zop_predef) 300 274 return WEIGHT(litLength, optLevel); 301 - /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX 302 - * because it isn't representable in the zstd format. So instead just 303 - * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block 304 - * would be all literals. 275 + 276 + /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX 277 + * because it isn't representable in the zstd format. 278 + * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. 279 + * In such a case, the block would be all literals. 305 280 */ 306 281 if (litLength == ZSTD_BLOCKSIZE_MAX) 307 282 return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel); ··· 316 289 } 317 290 318 291 /* ZSTD_getMatchPrice() : 319 - * Provides the cost of the match part (offset + matchLength) of a sequence 292 + * Provides the cost of the match part (offset + matchLength) of a sequence. 320 293 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. 321 - * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2 294 + * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq() 322 295 * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) 323 296 */ 324 297 FORCE_INLINE_TEMPLATE U32 325 - ZSTD_getMatchPrice(U32 const offcode, 298 + ZSTD_getMatchPrice(U32 const offBase, 326 299 U32 const matchLength, 327 300 const optState_t* const optPtr, 328 301 int const optLevel) 329 302 { 330 303 U32 price; 331 - U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode)); 304 + U32 const offCode = ZSTD_highbit32(offBase); 332 305 U32 const mlBase = matchLength - MINMATCH; 333 306 assert(matchLength >= MINMATCH); 334 307 335 - if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ 336 - return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); 308 + if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */ 309 + return WEIGHT(mlBase, optLevel) 310 + + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */ 337 311 338 312 /* dynamic statistics */ 339 313 price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); ··· 353 325 } 354 326 355 327 /* ZSTD_updateStats() : 356 - * assumption : literals + litLengtn <= iend */ 328 + * assumption : literals + litLength <= iend */ 357 329 static void ZSTD_updateStats(optState_t* const optPtr, 358 330 U32 litLength, const BYTE* literals, 359 - U32 offsetCode, U32 matchLength) 331 + U32 offBase, U32 matchLength) 360 332 { 361 333 /* literals */ 362 334 if (ZSTD_compressedLiterals(optPtr)) { ··· 372 344 optPtr->litLengthSum++; 373 345 } 374 346 375 - /* offset code : expected to follow storeSeq() numeric representation */ 376 - { U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode)); 347 + /* offset code : follows storeSeq() numeric representation */ 348 + { U32 const offCode = ZSTD_highbit32(offBase); 377 349 assert(offCode <= MaxOff); 378 350 optPtr->offCodeFreq[offCode]++; 379 351 optPtr->offCodeSum++; ··· 407 379 408 380 /* Update hashTable3 up to ip (excluded) 409 381 Assumption : always within prefix (i.e. not within extDict) */ 410 - static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms, 411 - U32* nextToUpdate3, 412 - const BYTE* const ip) 382 + static 383 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 384 + U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_MatchState_t* ms, 385 + U32* nextToUpdate3, 386 + const BYTE* const ip) 413 387 { 414 388 U32* const hashTable3 = ms->hashTable3; 415 389 U32 const hashLog3 = ms->hashLog3; ··· 438 408 * @param ip assumed <= iend-8 . 439 409 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position 440 410 * @return : nb of positions added */ 441 - static U32 ZSTD_insertBt1( 442 - const ZSTD_matchState_t* ms, 411 + static 412 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 413 + U32 ZSTD_insertBt1( 414 + const ZSTD_MatchState_t* ms, 443 415 const BYTE* const ip, const BYTE* const iend, 444 416 U32 const target, 445 417 U32 const mls, const int extDict) ··· 559 527 } 560 528 561 529 FORCE_INLINE_TEMPLATE 530 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 562 531 void ZSTD_updateTree_internal( 563 - ZSTD_matchState_t* ms, 532 + ZSTD_MatchState_t* ms, 564 533 const BYTE* const ip, const BYTE* const iend, 565 534 const U32 mls, const ZSTD_dictMode_e dictMode) 566 535 { 567 536 const BYTE* const base = ms->window.base; 568 537 U32 const target = (U32)(ip - base); 569 538 U32 idx = ms->nextToUpdate; 570 - DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", 539 + DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", 571 540 idx, target, dictMode); 572 541 573 542 while(idx < target) { ··· 581 548 ms->nextToUpdate = target; 582 549 } 583 550 584 - void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { 551 + void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend) { 585 552 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); 586 553 } 587 554 588 555 FORCE_INLINE_TEMPLATE 589 - U32 ZSTD_insertBtAndGetAllMatches ( 590 - ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ 591 - ZSTD_matchState_t* ms, 592 - U32* nextToUpdate3, 593 - const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, 594 - const U32 rep[ZSTD_REP_NUM], 595 - U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ 596 - const U32 lengthToBeat, 597 - U32 const mls /* template */) 556 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 557 + U32 558 + ZSTD_insertBtAndGetAllMatches ( 559 + ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ 560 + ZSTD_MatchState_t* ms, 561 + U32* nextToUpdate3, 562 + const BYTE* const ip, const BYTE* const iLimit, 563 + const ZSTD_dictMode_e dictMode, 564 + const U32 rep[ZSTD_REP_NUM], 565 + const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ 566 + const U32 lengthToBeat, 567 + const U32 mls /* template */) 598 568 { 599 569 const ZSTD_compressionParameters* const cParams = &ms->cParams; 600 570 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); ··· 626 590 U32 mnum = 0; 627 591 U32 nbCompares = 1U << cParams->searchLog; 628 592 629 - const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; 593 + const ZSTD_MatchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; 630 594 const ZSTD_compressionParameters* const dmsCParams = 631 595 dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; 632 596 const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; ··· 665 629 assert(curr >= windowLow); 666 630 if ( dictMode == ZSTD_extDict 667 631 && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */ 668 - & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) 632 + & (ZSTD_index_overlap_check(dictLimit, repIndex)) ) 669 633 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { 670 634 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; 671 635 } 672 636 if (dictMode == ZSTD_dictMatchState 673 637 && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */ 674 - & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ 638 + & (ZSTD_index_overlap_check(dictLimit, repIndex)) ) 675 639 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { 676 640 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; 677 641 } } ··· 680 644 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", 681 645 repCode, ll0, repOffset, repLen); 682 646 bestLength = repLen; 683 - matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1); /* expect value between 1 and 3 */ 647 + matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */ 684 648 matches[mnum].len = (U32)repLen; 685 649 mnum++; 686 650 if ( (repLen > sufficient_len) ··· 709 673 bestLength = mlen; 710 674 assert(curr > matchIndex3); 711 675 assert(mnum==0); /* no prior solution */ 712 - matches[0].off = STORE_OFFSET(curr - matchIndex3); 676 + matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3); 713 677 matches[0].len = (U32)mlen; 714 678 mnum = 1; 715 679 if ( (mlen > sufficient_len) | ··· 742 706 } 743 707 744 708 if (matchLength > bestLength) { 745 - DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", 746 - (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex)); 709 + DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)", 710 + (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex)); 747 711 assert(matchEndIdx > matchIndex); 748 712 if (matchLength > matchEndIdx - matchIndex) 749 713 matchEndIdx = matchIndex + (U32)matchLength; 750 714 bestLength = matchLength; 751 - matches[mnum].off = STORE_OFFSET(curr - matchIndex); 715 + matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex); 752 716 matches[mnum].len = (U32)matchLength; 753 717 mnum++; 754 718 if ( (matchLength > ZSTD_OPT_NUM) ··· 790 754 791 755 if (matchLength > bestLength) { 792 756 matchIndex = dictMatchIndex + dmsIndexDelta; 793 - DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", 794 - (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex)); 757 + DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)", 758 + (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex)); 795 759 if (matchLength > matchEndIdx - matchIndex) 796 760 matchEndIdx = matchIndex + (U32)matchLength; 797 761 bestLength = matchLength; 798 - matches[mnum].off = STORE_OFFSET(curr - matchIndex); 762 + matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex); 799 763 matches[mnum].len = (U32)matchLength; 800 764 mnum++; 801 765 if ( (matchLength > ZSTD_OPT_NUM) ··· 820 784 821 785 typedef U32 (*ZSTD_getAllMatchesFn)( 822 786 ZSTD_match_t*, 823 - ZSTD_matchState_t*, 787 + ZSTD_MatchState_t*, 824 788 U32*, 825 789 const BYTE*, 826 790 const BYTE*, ··· 828 792 U32 const ll0, 829 793 U32 const lengthToBeat); 830 794 831 - FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal( 795 + FORCE_INLINE_TEMPLATE 796 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 797 + U32 ZSTD_btGetAllMatches_internal( 832 798 ZSTD_match_t* matches, 833 - ZSTD_matchState_t* ms, 799 + ZSTD_MatchState_t* ms, 834 800 U32* nextToUpdate3, 835 801 const BYTE* ip, 836 802 const BYTE* const iHighLimit, ··· 855 817 #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \ 856 818 static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \ 857 819 ZSTD_match_t* matches, \ 858 - ZSTD_matchState_t* ms, \ 820 + ZSTD_MatchState_t* ms, \ 859 821 U32* nextToUpdate3, \ 860 822 const BYTE* ip, \ 861 823 const BYTE* const iHighLimit, \ ··· 887 849 } 888 850 889 851 static ZSTD_getAllMatchesFn 890 - ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode) 852 + ZSTD_selectBtGetAllMatches(ZSTD_MatchState_t const* ms, ZSTD_dictMode_e const dictMode) 891 853 { 892 854 ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = { 893 855 ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict), ··· 906 868 907 869 /* Struct containing info needed to make decision about ldm inclusion */ 908 870 typedef struct { 909 - rawSeqStore_t seqStore; /* External match candidates store for this block */ 871 + RawSeqStore_t seqStore; /* External match candidates store for this block */ 910 872 U32 startPosInBlock; /* Start position of the current match candidate */ 911 873 U32 endPosInBlock; /* End position of the current match candidate */ 912 874 U32 offset; /* Offset of the match candidate */ ··· 916 878 * Moves forward in @rawSeqStore by @nbBytes, 917 879 * which will update the fields 'pos' and 'posInSequence'. 918 880 */ 919 - static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) 881 + static void ZSTD_optLdm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes) 920 882 { 921 883 U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); 922 884 while (currPos && rawSeqStore->pos < rawSeqStore->size) { ··· 973 935 return; 974 936 } 975 937 976 - /* Matches may be < MINMATCH by this process. In that case, we will reject them 938 + /* Matches may be < minMatch by this process. In that case, we will reject them 977 939 when we are deciding whether or not to add the ldm */ 978 940 optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining; 979 941 optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining; ··· 995 957 * into 'matches'. Maintains the correct ordering of 'matches'. 996 958 */ 997 959 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, 998 - const ZSTD_optLdm_t* optLdm, U32 currPosInBlock) 960 + const ZSTD_optLdm_t* optLdm, U32 currPosInBlock, 961 + U32 minMatch) 999 962 { 1000 963 U32 const posDiff = currPosInBlock - optLdm->startPosInBlock; 1001 - /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */ 964 + /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */ 1002 965 U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; 1003 966 1004 967 /* Ensure that current block position is not outside of the match */ 1005 968 if (currPosInBlock < optLdm->startPosInBlock 1006 969 || currPosInBlock >= optLdm->endPosInBlock 1007 - || candidateMatchLength < MINMATCH) { 970 + || candidateMatchLength < minMatch) { 1008 971 return; 1009 972 } 1010 973 1011 974 if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { 1012 - U32 const candidateOffCode = STORE_OFFSET(optLdm->offset); 1013 - DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u", 1014 - candidateOffCode, candidateMatchLength, currPosInBlock); 975 + U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset); 976 + DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u", 977 + candidateOffBase, candidateMatchLength, currPosInBlock); 1015 978 matches[*nbMatches].len = candidateMatchLength; 1016 - matches[*nbMatches].off = candidateOffCode; 979 + matches[*nbMatches].off = candidateOffBase; 1017 980 (*nbMatches)++; 1018 981 } 1019 982 } ··· 1025 986 static void 1026 987 ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, 1027 988 ZSTD_match_t* matches, U32* nbMatches, 1028 - U32 currPosInBlock, U32 remainingBytes) 989 + U32 currPosInBlock, U32 remainingBytes, 990 + U32 minMatch) 1029 991 { 1030 992 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { 1031 993 return; ··· 1043 1003 } 1044 1004 ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes); 1045 1005 } 1046 - ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock); 1006 + ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock, minMatch); 1047 1007 } 1048 1008 1049 1009 1050 1010 /*-******************************* 1051 1011 * Optimal parser 1052 1012 *********************************/ 1053 - 1054 - static U32 ZSTD_totalLen(ZSTD_optimal_t sol) 1055 - { 1056 - return sol.litlen + sol.mlen; 1057 - } 1058 1013 1059 1014 #if 0 /* debug */ 1060 1015 ··· 1068 1033 1069 1034 #endif 1070 1035 1071 - FORCE_INLINE_TEMPLATE size_t 1072 - ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, 1073 - seqStore_t* seqStore, 1036 + #define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel) 1037 + #define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel) 1038 + #define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1)) 1039 + 1040 + FORCE_INLINE_TEMPLATE 1041 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1042 + size_t 1043 + ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t* ms, 1044 + SeqStore_t* seqStore, 1074 1045 U32 rep[ZSTD_REP_NUM], 1075 1046 const void* src, size_t srcSize, 1076 1047 const int optLevel, ··· 1100 1059 1101 1060 ZSTD_optimal_t* const opt = optStatePtr->priceTable; 1102 1061 ZSTD_match_t* const matches = optStatePtr->matchTable; 1103 - ZSTD_optimal_t lastSequence; 1062 + ZSTD_optimal_t lastStretch; 1104 1063 ZSTD_optLdm_t optLdm; 1064 + 1065 + ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t)); 1105 1066 1106 1067 optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; 1107 1068 optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; ··· 1125 1082 U32 const ll0 = !litlen; 1126 1083 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch); 1127 1084 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, 1128 - (U32)(ip-istart), (U32)(iend - ip)); 1129 - if (!nbMatches) { ip++; continue; } 1085 + (U32)(ip-istart), (U32)(iend-ip), 1086 + minMatch); 1087 + if (!nbMatches) { 1088 + DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart)); 1089 + ip++; 1090 + continue; 1091 + } 1092 + 1093 + /* Match found: let's store this solution, and eventually find more candidates. 1094 + * During this forward pass, @opt is used to store stretches, 1095 + * defined as "a match followed by N literals". 1096 + * Note how this is different from a Sequence, which is "N literals followed by a match". 1097 + * Storing stretches allows us to store different match predecessors 1098 + * for each literal position part of a literals run. */ 1130 1099 1131 1100 /* initialize opt[0] */ 1132 - { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; } 1133 - opt[0].mlen = 0; /* means is_a_literal */ 1101 + opt[0].mlen = 0; /* there are only literals so far */ 1134 1102 opt[0].litlen = litlen; 1135 - /* We don't need to include the actual price of the literals because 1136 - * it is static for the duration of the forward pass, and is included 1137 - * in every price. We include the literal length to avoid negative 1138 - * prices when we subtract the previous literal length. 1103 + /* No need to include the actual price of the literals before the first match 1104 + * because it is static for the duration of the forward pass, and is included 1105 + * in every subsequent price. But, we include the literal length because 1106 + * the cost variation of litlen depends on the value of litlen. 1139 1107 */ 1140 - opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel); 1108 + opt[0].price = LL_PRICE(litlen); 1109 + ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0])); 1110 + ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep)); 1141 1111 1142 1112 /* large match -> immediate encoding */ 1143 1113 { U32 const maxML = matches[nbMatches-1].len; 1144 - U32 const maxOffcode = matches[nbMatches-1].off; 1145 - DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", 1146 - nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart)); 1114 + U32 const maxOffBase = matches[nbMatches-1].off; 1115 + DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series", 1116 + nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart)); 1147 1117 1148 1118 if (maxML > sufficient_len) { 1149 - lastSequence.litlen = litlen; 1150 - lastSequence.mlen = maxML; 1151 - lastSequence.off = maxOffcode; 1152 - DEBUGLOG(6, "large match (%u>%u), immediate encoding", 1119 + lastStretch.litlen = 0; 1120 + lastStretch.mlen = maxML; 1121 + lastStretch.off = maxOffBase; 1122 + DEBUGLOG(6, "large match (%u>%u) => immediate encoding", 1153 1123 maxML, sufficient_len); 1154 1124 cur = 0; 1155 - last_pos = ZSTD_totalLen(lastSequence); 1125 + last_pos = maxML; 1156 1126 goto _shortestPath; 1157 1127 } } 1158 1128 1159 1129 /* set prices for first matches starting position == 0 */ 1160 1130 assert(opt[0].price >= 0); 1161 - { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); 1162 - U32 pos; 1131 + { U32 pos; 1163 1132 U32 matchNb; 1164 1133 for (pos = 1; pos < minMatch; pos++) { 1165 - opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ 1134 + opt[pos].price = ZSTD_MAX_PRICE; 1135 + opt[pos].mlen = 0; 1136 + opt[pos].litlen = litlen + pos; 1166 1137 } 1167 1138 for (matchNb = 0; matchNb < nbMatches; matchNb++) { 1168 - U32 const offcode = matches[matchNb].off; 1139 + U32 const offBase = matches[matchNb].off; 1169 1140 U32 const end = matches[matchNb].len; 1170 1141 for ( ; pos <= end ; pos++ ) { 1171 - U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel); 1172 - U32 const sequencePrice = literalsPrice + matchPrice; 1142 + int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel); 1143 + int const sequencePrice = opt[0].price + matchPrice; 1173 1144 DEBUGLOG(7, "rPos:%u => set initial price : %.2f", 1174 1145 pos, ZSTD_fCost(sequencePrice)); 1175 1146 opt[pos].mlen = pos; 1176 - opt[pos].off = offcode; 1177 - opt[pos].litlen = litlen; 1178 - opt[pos].price = (int)sequencePrice; 1179 - } } 1147 + opt[pos].off = offBase; 1148 + opt[pos].litlen = 0; /* end of match */ 1149 + opt[pos].price = sequencePrice + LL_PRICE(0); 1150 + } 1151 + } 1180 1152 last_pos = pos-1; 1153 + opt[pos].price = ZSTD_MAX_PRICE; 1181 1154 } 1182 1155 } 1183 1156 1184 1157 /* check further positions */ 1185 1158 for (cur = 1; cur <= last_pos; cur++) { 1186 1159 const BYTE* const inr = ip + cur; 1187 - assert(cur < ZSTD_OPT_NUM); 1188 - DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur) 1160 + assert(cur <= ZSTD_OPT_NUM); 1161 + DEBUGLOG(7, "cPos:%i==rPos:%u", (int)(inr-istart), cur); 1189 1162 1190 1163 /* Fix current position with one literal if cheaper */ 1191 - { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; 1164 + { U32 const litlen = opt[cur-1].litlen + 1; 1192 1165 int const price = opt[cur-1].price 1193 - + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) 1194 - + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) 1195 - - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); 1166 + + LIT_PRICE(ip+cur-1) 1167 + + LL_INCPRICE(litlen); 1196 1168 assert(price < 1000000000); /* overflow check */ 1197 1169 if (price <= opt[cur].price) { 1198 - DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", 1199 - inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, 1170 + ZSTD_optimal_t const prevMatch = opt[cur]; 1171 + DEBUGLOG(7, "cPos:%i==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", 1172 + (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, 1200 1173 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); 1201 - opt[cur].mlen = 0; 1202 - opt[cur].off = 0; 1174 + opt[cur] = opt[cur-1]; 1203 1175 opt[cur].litlen = litlen; 1204 1176 opt[cur].price = price; 1177 + if ( (optLevel >= 1) /* additional check only for higher modes */ 1178 + && (prevMatch.litlen == 0) /* replace a match */ 1179 + && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */ 1180 + && LIKELY(ip + cur < iend) 1181 + ) { 1182 + /* check next position, in case it would be cheaper */ 1183 + int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1); 1184 + int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1); 1185 + DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f", 1186 + cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals)); 1187 + if ( (with1literal < withMoreLiterals) 1188 + && (with1literal < opt[cur+1].price) ) { 1189 + /* update offset history - before it disappears */ 1190 + U32 const prev = cur - prevMatch.mlen; 1191 + Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0); 1192 + assert(cur >= prevMatch.mlen); 1193 + DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !", 1194 + ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals), 1195 + newReps.rep[0], newReps.rep[1], newReps.rep[2] ); 1196 + opt[cur+1] = prevMatch; /* mlen & offbase */ 1197 + ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(Repcodes_t)); 1198 + opt[cur+1].litlen = 1; 1199 + opt[cur+1].price = with1literal; 1200 + if (last_pos < cur+1) last_pos = cur+1; 1201 + } 1202 + } 1205 1203 } else { 1206 - DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", 1207 - inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), 1208 - opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]); 1204 + DEBUGLOG(7, "cPos:%i==rPos:%u : literal would cost more (%.2f>%.2f)", 1205 + (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price)); 1209 1206 } 1210 1207 } 1211 1208 1212 - /* Set the repcodes of the current position. We must do it here 1213 - * because we rely on the repcodes of the 2nd to last sequence being 1214 - * correct to set the next chunks repcodes during the backward 1215 - * traversal. 1209 + /* Offset history is not updated during match comparison. 1210 + * Do it here, now that the match is selected and confirmed. 1216 1211 */ 1217 - ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); 1212 + ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(Repcodes_t)); 1218 1213 assert(cur >= opt[cur].mlen); 1219 - if (opt[cur].mlen != 0) { 1214 + if (opt[cur].litlen == 0) { 1215 + /* just finished a match => alter offset history */ 1220 1216 U32 const prev = cur - opt[cur].mlen; 1221 - repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); 1222 - ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); 1223 - } else { 1224 - ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); 1217 + Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0); 1218 + ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(Repcodes_t)); 1225 1219 } 1226 1220 1227 1221 /* last match must start at a minimum distance of 8 from oend */ ··· 1268 1188 1269 1189 if ( (optLevel==0) /*static_test*/ 1270 1190 && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { 1271 - DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); 1191 + DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1); 1272 1192 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ 1273 1193 } 1274 1194 1275 1195 assert(opt[cur].price >= 0); 1276 - { U32 const ll0 = (opt[cur].mlen != 0); 1277 - U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; 1278 - U32 const previousPrice = (U32)opt[cur].price; 1279 - U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); 1196 + { U32 const ll0 = (opt[cur].litlen == 0); 1197 + int const previousPrice = opt[cur].price; 1198 + int const basePrice = previousPrice + LL_PRICE(0); 1280 1199 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch); 1281 1200 U32 matchNb; 1282 1201 1283 1202 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, 1284 - (U32)(inr-istart), (U32)(iend-inr)); 1203 + (U32)(inr-istart), (U32)(iend-inr), 1204 + minMatch); 1285 1205 1286 1206 if (!nbMatches) { 1287 1207 DEBUGLOG(7, "rPos:%u : no match found", cur); 1288 1208 continue; 1289 1209 } 1290 1210 1291 - { U32 const maxML = matches[nbMatches-1].len; 1292 - DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u", 1293 - inr-istart, cur, nbMatches, maxML); 1211 + { U32 const longestML = matches[nbMatches-1].len; 1212 + DEBUGLOG(7, "cPos:%i==rPos:%u, found %u matches, of longest ML=%u", 1213 + (int)(inr-istart), cur, nbMatches, longestML); 1294 1214 1295 - if ( (maxML > sufficient_len) 1296 - || (cur + maxML >= ZSTD_OPT_NUM) ) { 1297 - lastSequence.mlen = maxML; 1298 - lastSequence.off = matches[nbMatches-1].off; 1299 - lastSequence.litlen = litlen; 1300 - cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */ 1301 - last_pos = cur + ZSTD_totalLen(lastSequence); 1302 - if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */ 1215 + if ( (longestML > sufficient_len) 1216 + || (cur + longestML >= ZSTD_OPT_NUM) 1217 + || (ip + cur + longestML >= iend) ) { 1218 + lastStretch.mlen = longestML; 1219 + lastStretch.off = matches[nbMatches-1].off; 1220 + lastStretch.litlen = 0; 1221 + last_pos = cur + longestML; 1303 1222 goto _shortestPath; 1304 1223 } } 1305 1224 ··· 1309 1230 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; 1310 1231 U32 mlen; 1311 1232 1312 - DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u", 1313 - matchNb, matches[matchNb].off, lastML, litlen); 1233 + DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u", 1234 + matchNb, matches[matchNb].off, lastML, opt[cur].litlen); 1314 1235 1315 1236 for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ 1316 1237 U32 const pos = cur + mlen; 1317 - int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); 1238 + int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); 1318 1239 1319 1240 if ((pos > last_pos) || (price < opt[pos].price)) { 1320 1241 DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", 1321 1242 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); 1322 - while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ 1243 + while (last_pos < pos) { 1244 + /* fill empty positions, for future comparisons */ 1245 + last_pos++; 1246 + opt[last_pos].price = ZSTD_MAX_PRICE; 1247 + opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */ 1248 + } 1323 1249 opt[pos].mlen = mlen; 1324 1250 opt[pos].off = offset; 1325 - opt[pos].litlen = litlen; 1251 + opt[pos].litlen = 0; 1326 1252 opt[pos].price = price; 1327 1253 } else { 1328 1254 DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", ··· 1335 1251 if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ 1336 1252 } 1337 1253 } } } 1254 + opt[last_pos+1].price = ZSTD_MAX_PRICE; 1338 1255 } /* for (cur = 1; cur <= last_pos; cur++) */ 1339 1256 1340 - lastSequence = opt[last_pos]; 1341 - cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */ 1342 - assert(cur < ZSTD_OPT_NUM); /* control overflow*/ 1257 + lastStretch = opt[last_pos]; 1258 + assert(cur >= lastStretch.mlen); 1259 + cur = last_pos - lastStretch.mlen; 1343 1260 1344 1261 _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ 1345 1262 assert(opt[0].mlen == 0); 1263 + assert(last_pos >= lastStretch.mlen); 1264 + assert(cur == last_pos - lastStretch.mlen); 1346 1265 1347 - /* Set the next chunk's repcodes based on the repcodes of the beginning 1348 - * of the last match, and the last sequence. This avoids us having to 1349 - * update them while traversing the sequences. 1350 - */ 1351 - if (lastSequence.mlen != 0) { 1352 - repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); 1353 - ZSTD_memcpy(rep, &reps, sizeof(reps)); 1266 + if (lastStretch.mlen==0) { 1267 + /* no solution : all matches have been converted into literals */ 1268 + assert(lastStretch.litlen == (ip - anchor) + last_pos); 1269 + ip += last_pos; 1270 + continue; 1271 + } 1272 + assert(lastStretch.off > 0); 1273 + 1274 + /* Update offset history */ 1275 + if (lastStretch.litlen == 0) { 1276 + /* finishing on a match : update offset history */ 1277 + Repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0); 1278 + ZSTD_memcpy(rep, &reps, sizeof(Repcodes_t)); 1354 1279 } else { 1355 - ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); 1280 + ZSTD_memcpy(rep, lastStretch.rep, sizeof(Repcodes_t)); 1281 + assert(cur >= lastStretch.litlen); 1282 + cur -= lastStretch.litlen; 1356 1283 } 1357 1284 1358 - { U32 const storeEnd = cur + 1; 1285 + /* Let's write the shortest path solution. 1286 + * It is stored in @opt in reverse order, 1287 + * starting from @storeEnd (==cur+2), 1288 + * effectively partially @opt overwriting. 1289 + * Content is changed too: 1290 + * - So far, @opt stored stretches, aka a match followed by literals 1291 + * - Now, it will store sequences, aka literals followed by a match 1292 + */ 1293 + { U32 const storeEnd = cur + 2; 1359 1294 U32 storeStart = storeEnd; 1360 - U32 seqPos = cur; 1295 + U32 stretchPos = cur; 1361 1296 1362 1297 DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", 1363 1298 last_pos, cur); (void)last_pos; 1364 - assert(storeEnd < ZSTD_OPT_NUM); 1365 - DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", 1366 - storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); 1367 - opt[storeEnd] = lastSequence; 1368 - while (seqPos > 0) { 1369 - U32 const backDist = ZSTD_totalLen(opt[seqPos]); 1299 + assert(storeEnd < ZSTD_OPT_SIZE); 1300 + DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", 1301 + storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off); 1302 + if (lastStretch.litlen > 0) { 1303 + /* last "sequence" is unfinished: just a bunch of literals */ 1304 + opt[storeEnd].litlen = lastStretch.litlen; 1305 + opt[storeEnd].mlen = 0; 1306 + storeStart = storeEnd-1; 1307 + opt[storeStart] = lastStretch; 1308 + } { 1309 + opt[storeEnd] = lastStretch; /* note: litlen will be fixed */ 1310 + storeStart = storeEnd; 1311 + } 1312 + while (1) { 1313 + ZSTD_optimal_t nextStretch = opt[stretchPos]; 1314 + opt[storeStart].litlen = nextStretch.litlen; 1315 + DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)", 1316 + opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off); 1317 + if (nextStretch.mlen == 0) { 1318 + /* reaching beginning of segment */ 1319 + break; 1320 + } 1370 1321 storeStart--; 1371 - DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", 1372 - seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off); 1373 - opt[storeStart] = opt[seqPos]; 1374 - seqPos = (seqPos > backDist) ? seqPos - backDist : 0; 1322 + opt[storeStart] = nextStretch; /* note: litlen will be fixed */ 1323 + assert(nextStretch.litlen + nextStretch.mlen <= stretchPos); 1324 + stretchPos -= nextStretch.litlen + nextStretch.mlen; 1375 1325 } 1376 1326 1377 1327 /* save sequences */ 1378 - DEBUGLOG(6, "sending selected sequences into seqStore") 1328 + DEBUGLOG(6, "sending selected sequences into seqStore"); 1379 1329 { U32 storePos; 1380 1330 for (storePos=storeStart; storePos <= storeEnd; storePos++) { 1381 1331 U32 const llen = opt[storePos].litlen; 1382 1332 U32 const mlen = opt[storePos].mlen; 1383 - U32 const offCode = opt[storePos].off; 1333 + U32 const offBase = opt[storePos].off; 1384 1334 U32 const advance = llen + mlen; 1385 - DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", 1386 - anchor - istart, (unsigned)llen, (unsigned)mlen); 1335 + DEBUGLOG(6, "considering seq starting at %i, llen=%u, mlen=%u", 1336 + (int)(anchor - istart), (unsigned)llen, (unsigned)mlen); 1387 1337 1388 1338 if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ 1389 1339 assert(storePos == storeEnd); /* must be last sequence */ ··· 1426 1308 } 1427 1309 1428 1310 assert(anchor + llen <= iend); 1429 - ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); 1430 - ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen); 1311 + ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen); 1312 + ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen); 1431 1313 anchor += advance; 1432 1314 ip = anchor; 1433 1315 } } 1316 + DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]); 1317 + 1318 + /* update all costs */ 1434 1319 ZSTD_setBasePrices(optStatePtr, optLevel); 1435 1320 } 1436 1321 } /* while (ip < ilimit) */ ··· 1441 1320 /* Return the last literals size */ 1442 1321 return (size_t)(iend - anchor); 1443 1322 } 1323 + #endif /* build exclusions */ 1444 1324 1325 + #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR 1445 1326 static size_t ZSTD_compressBlock_opt0( 1446 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1327 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1447 1328 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) 1448 1329 { 1449 1330 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode); 1450 1331 } 1332 + #endif 1451 1333 1334 + #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR 1452 1335 static size_t ZSTD_compressBlock_opt2( 1453 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1336 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1454 1337 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) 1455 1338 { 1456 1339 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode); 1457 1340 } 1341 + #endif 1458 1342 1343 + #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR 1459 1344 size_t ZSTD_compressBlock_btopt( 1460 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1345 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1461 1346 const void* src, size_t srcSize) 1462 1347 { 1463 1348 DEBUGLOG(5, "ZSTD_compressBlock_btopt"); 1464 1349 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict); 1465 1350 } 1351 + #endif 1466 1352 1467 1353 1468 1354 1469 1355 1356 + #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR 1470 1357 /* ZSTD_initStats_ultra(): 1471 1358 * make a first compression pass, just to seed stats with more accurate starting values. 1472 1359 * only works on first block, with no dictionary and no ldm. 1473 - * this function cannot error, hence its contract must be respected. 1360 + * this function cannot error out, its narrow contract must be respected. 1474 1361 */ 1475 - static void 1476 - ZSTD_initStats_ultra(ZSTD_matchState_t* ms, 1477 - seqStore_t* seqStore, 1478 - U32 rep[ZSTD_REP_NUM], 1479 - const void* src, size_t srcSize) 1362 + static 1363 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1364 + void ZSTD_initStats_ultra(ZSTD_MatchState_t* ms, 1365 + SeqStore_t* seqStore, 1366 + U32 rep[ZSTD_REP_NUM], 1367 + const void* src, size_t srcSize) 1480 1368 { 1481 1369 U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ 1482 1370 ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); ··· 1498 1368 1499 1369 ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/ 1500 1370 1501 - /* invalidate first scan from history */ 1371 + /* invalidate first scan from history, only keep entropy stats */ 1502 1372 ZSTD_resetSeqStore(seqStore); 1503 1373 ms->window.base -= srcSize; 1504 1374 ms->window.dictLimit += (U32)srcSize; ··· 1508 1378 } 1509 1379 1510 1380 size_t ZSTD_compressBlock_btultra( 1511 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1381 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1512 1382 const void* src, size_t srcSize) 1513 1383 { 1514 1384 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); ··· 1516 1386 } 1517 1387 1518 1388 size_t ZSTD_compressBlock_btultra2( 1519 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1389 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1520 1390 const void* src, size_t srcSize) 1521 1391 { 1522 1392 U32 const curr = (U32)((const BYTE*)src - ms->window.base); 1523 1393 DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); 1524 1394 1525 - /* 2-pass strategy: 1395 + /* 2-passes strategy: 1526 1396 * this strategy makes a first pass over first block to collect statistics 1527 - * and seed next round's statistics with it. 1528 - * After 1st pass, function forgets everything, and starts a new block. 1397 + * in order to seed next round's statistics with it. 1398 + * After 1st pass, function forgets history, and starts a new block. 1529 1399 * Consequently, this can only work if no data has been previously loaded in tables, 1530 1400 * aka, no dictionary, no prefix, no ldm preprocessing. 1531 1401 * The compression ratio gain is generally small (~0.5% on first block), ··· 1534 1404 if ( (ms->opt.litLengthSum==0) /* first block */ 1535 1405 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ 1536 1406 && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ 1537 - && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ 1538 - && (srcSize > ZSTD_PREDEF_THRESHOLD) 1407 + && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ 1408 + && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */ 1539 1409 ) { 1540 1410 ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); 1541 1411 } 1542 1412 1543 1413 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); 1544 1414 } 1415 + #endif 1545 1416 1417 + #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR 1546 1418 size_t ZSTD_compressBlock_btopt_dictMatchState( 1547 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1419 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1548 1420 const void* src, size_t srcSize) 1549 1421 { 1550 1422 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); 1551 1423 } 1552 1424 1425 + size_t ZSTD_compressBlock_btopt_extDict( 1426 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1427 + const void* src, size_t srcSize) 1428 + { 1429 + return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict); 1430 + } 1431 + #endif 1432 + 1433 + #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR 1553 1434 size_t ZSTD_compressBlock_btultra_dictMatchState( 1554 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1435 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1555 1436 const void* src, size_t srcSize) 1556 1437 { 1557 1438 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); 1558 1439 } 1559 1440 1560 - size_t ZSTD_compressBlock_btopt_extDict( 1561 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1562 - const void* src, size_t srcSize) 1563 - { 1564 - return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict); 1565 - } 1566 - 1567 1441 size_t ZSTD_compressBlock_btultra_extDict( 1568 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1442 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 1569 1443 const void* src, size_t srcSize) 1570 1444 { 1571 1445 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict); 1572 1446 } 1447 + #endif 1573 1448 1574 1449 /* note : no btultra2 variant for extDict nor dictMatchState, 1575 1450 * because btultra2 is not meant to work with dictionaries
+42 -19
lib/zstd/compress/zstd_opt.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 12 11 #ifndef ZSTD_OPT_H 13 12 #define ZSTD_OPT_H 14 13 15 - 16 14 #include "zstd_compress_internal.h" 17 15 16 + #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ 17 + || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ 18 + || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) 18 19 /* used in ZSTD_loadDictionaryContent() */ 19 - void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); 20 + void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend); 21 + #endif 20 22 23 + #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR 21 24 size_t ZSTD_compressBlock_btopt( 22 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 25 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 23 26 void const* src, size_t srcSize); 24 - size_t ZSTD_compressBlock_btultra( 25 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 26 - void const* src, size_t srcSize); 27 - size_t ZSTD_compressBlock_btultra2( 28 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 29 - void const* src, size_t srcSize); 30 - 31 - 32 27 size_t ZSTD_compressBlock_btopt_dictMatchState( 33 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 28 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 29 + void const* src, size_t srcSize); 30 + size_t ZSTD_compressBlock_btopt_extDict( 31 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 32 + void const* src, size_t srcSize); 33 + 34 + #define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt 35 + #define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE ZSTD_compressBlock_btopt_dictMatchState 36 + #define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT ZSTD_compressBlock_btopt_extDict 37 + #else 38 + #define ZSTD_COMPRESSBLOCK_BTOPT NULL 39 + #define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE NULL 40 + #define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT NULL 41 + #endif 42 + 43 + #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR 44 + size_t ZSTD_compressBlock_btultra( 45 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 34 46 void const* src, size_t srcSize); 35 47 size_t ZSTD_compressBlock_btultra_dictMatchState( 36 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 37 - void const* src, size_t srcSize); 38 - 39 - size_t ZSTD_compressBlock_btopt_extDict( 40 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 48 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 41 49 void const* src, size_t srcSize); 42 50 size_t ZSTD_compressBlock_btultra_extDict( 43 - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 51 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 44 52 void const* src, size_t srcSize); 45 53 46 54 /* note : no btultra2 variant for extDict nor dictMatchState, 47 55 * because btultra2 is not meant to work with dictionaries 48 56 * and is only specific for the first block (no prefix) */ 57 + size_t ZSTD_compressBlock_btultra2( 58 + ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 59 + void const* src, size_t srcSize); 49 60 61 + #define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra 62 + #define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE ZSTD_compressBlock_btultra_dictMatchState 63 + #define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT ZSTD_compressBlock_btultra_extDict 64 + #define ZSTD_COMPRESSBLOCK_BTULTRA2 ZSTD_compressBlock_btultra2 65 + #else 66 + #define ZSTD_COMPRESSBLOCK_BTULTRA NULL 67 + #define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE NULL 68 + #define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT NULL 69 + #define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL 70 + #endif 50 71 51 72 #endif /* ZSTD_OPT_H */
+239
lib/zstd/compress/zstd_preSplit.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 2 + /* 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 4 + * All rights reserved. 5 + * 6 + * This source code is licensed under both the BSD-style license (found in the 7 + * LICENSE file in the root directory of this source tree) and the GPLv2 (found 8 + * in the COPYING file in the root directory of this source tree). 9 + * You may select, at your option, one of the above-listed licenses. 10 + */ 11 + 12 + #include "../common/compiler.h" /* ZSTD_ALIGNOF */ 13 + #include "../common/mem.h" /* S64 */ 14 + #include "../common/zstd_deps.h" /* ZSTD_memset */ 15 + #include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */ 16 + #include "hist.h" /* HIST_add */ 17 + #include "zstd_preSplit.h" 18 + 19 + 20 + #define BLOCKSIZE_MIN 3500 21 + #define THRESHOLD_PENALTY_RATE 16 22 + #define THRESHOLD_BASE (THRESHOLD_PENALTY_RATE - 2) 23 + #define THRESHOLD_PENALTY 3 24 + 25 + #define HASHLENGTH 2 26 + #define HASHLOG_MAX 10 27 + #define HASHTABLESIZE (1 << HASHLOG_MAX) 28 + #define HASHMASK (HASHTABLESIZE - 1) 29 + #define KNUTH 0x9e3779b9 30 + 31 + /* for hashLog > 8, hash 2 bytes. 32 + * for hashLog == 8, just take the byte, no hashing. 33 + * The speed of this method relies on compile-time constant propagation */ 34 + FORCE_INLINE_TEMPLATE unsigned hash2(const void *p, unsigned hashLog) 35 + { 36 + assert(hashLog >= 8); 37 + if (hashLog == 8) return (U32)((const BYTE*)p)[0]; 38 + assert(hashLog <= HASHLOG_MAX); 39 + return (U32)(MEM_read16(p)) * KNUTH >> (32 - hashLog); 40 + } 41 + 42 + 43 + typedef struct { 44 + unsigned events[HASHTABLESIZE]; 45 + size_t nbEvents; 46 + } Fingerprint; 47 + typedef struct { 48 + Fingerprint pastEvents; 49 + Fingerprint newEvents; 50 + } FPStats; 51 + 52 + static void initStats(FPStats* fpstats) 53 + { 54 + ZSTD_memset(fpstats, 0, sizeof(FPStats)); 55 + } 56 + 57 + FORCE_INLINE_TEMPLATE void 58 + addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog) 59 + { 60 + const char* p = (const char*)src; 61 + size_t limit = srcSize - HASHLENGTH + 1; 62 + size_t n; 63 + assert(srcSize >= HASHLENGTH); 64 + for (n = 0; n < limit; n+=samplingRate) { 65 + fp->events[hash2(p+n, hashLog)]++; 66 + } 67 + fp->nbEvents += limit/samplingRate; 68 + } 69 + 70 + FORCE_INLINE_TEMPLATE void 71 + recordFingerprint_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog) 72 + { 73 + ZSTD_memset(fp, 0, sizeof(unsigned) * ((size_t)1 << hashLog)); 74 + fp->nbEvents = 0; 75 + addEvents_generic(fp, src, srcSize, samplingRate, hashLog); 76 + } 77 + 78 + typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize); 79 + 80 + #define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate 81 + 82 + #define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \ 83 + static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \ 84 + { \ 85 + recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \ 86 + } 87 + 88 + ZSTD_GEN_RECORD_FINGERPRINT(1, 10) 89 + ZSTD_GEN_RECORD_FINGERPRINT(5, 10) 90 + ZSTD_GEN_RECORD_FINGERPRINT(11, 9) 91 + ZSTD_GEN_RECORD_FINGERPRINT(43, 8) 92 + 93 + 94 + static U64 abs64(S64 s64) { return (U64)((s64 < 0) ? -s64 : s64); } 95 + 96 + static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2, unsigned hashLog) 97 + { 98 + U64 distance = 0; 99 + size_t n; 100 + assert(hashLog <= HASHLOG_MAX); 101 + for (n = 0; n < ((size_t)1 << hashLog); n++) { 102 + distance += 103 + abs64((S64)fp1->events[n] * (S64)fp2->nbEvents - (S64)fp2->events[n] * (S64)fp1->nbEvents); 104 + } 105 + return distance; 106 + } 107 + 108 + /* Compare newEvents with pastEvents 109 + * return 1 when considered "too different" 110 + */ 111 + static int compareFingerprints(const Fingerprint* ref, 112 + const Fingerprint* newfp, 113 + int penalty, 114 + unsigned hashLog) 115 + { 116 + assert(ref->nbEvents > 0); 117 + assert(newfp->nbEvents > 0); 118 + { U64 p50 = (U64)ref->nbEvents * (U64)newfp->nbEvents; 119 + U64 deviation = fpDistance(ref, newfp, hashLog); 120 + U64 threshold = p50 * (U64)(THRESHOLD_BASE + penalty) / THRESHOLD_PENALTY_RATE; 121 + return deviation >= threshold; 122 + } 123 + } 124 + 125 + static void mergeEvents(Fingerprint* acc, const Fingerprint* newfp) 126 + { 127 + size_t n; 128 + for (n = 0; n < HASHTABLESIZE; n++) { 129 + acc->events[n] += newfp->events[n]; 130 + } 131 + acc->nbEvents += newfp->nbEvents; 132 + } 133 + 134 + static void flushEvents(FPStats* fpstats) 135 + { 136 + size_t n; 137 + for (n = 0; n < HASHTABLESIZE; n++) { 138 + fpstats->pastEvents.events[n] = fpstats->newEvents.events[n]; 139 + } 140 + fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents; 141 + ZSTD_memset(&fpstats->newEvents, 0, sizeof(fpstats->newEvents)); 142 + } 143 + 144 + static void removeEvents(Fingerprint* acc, const Fingerprint* slice) 145 + { 146 + size_t n; 147 + for (n = 0; n < HASHTABLESIZE; n++) { 148 + assert(acc->events[n] >= slice->events[n]); 149 + acc->events[n] -= slice->events[n]; 150 + } 151 + acc->nbEvents -= slice->nbEvents; 152 + } 153 + 154 + #define CHUNKSIZE (8 << 10) 155 + static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize, 156 + int level, 157 + void* workspace, size_t wkspSize) 158 + { 159 + static const RecordEvents_f records_fs[] = { 160 + FP_RECORD(43), FP_RECORD(11), FP_RECORD(5), FP_RECORD(1) 161 + }; 162 + static const unsigned hashParams[] = { 8, 9, 10, 10 }; 163 + const RecordEvents_f record_f = (assert(0<=level && level<=3), records_fs[level]); 164 + FPStats* const fpstats = (FPStats*)workspace; 165 + const char* p = (const char*)blockStart; 166 + int penalty = THRESHOLD_PENALTY; 167 + size_t pos = 0; 168 + assert(blockSize == (128 << 10)); 169 + assert(workspace != NULL); 170 + assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0); 171 + ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats)); 172 + assert(wkspSize >= sizeof(FPStats)); (void)wkspSize; 173 + 174 + initStats(fpstats); 175 + record_f(&fpstats->pastEvents, p, CHUNKSIZE); 176 + for (pos = CHUNKSIZE; pos <= blockSize - CHUNKSIZE; pos += CHUNKSIZE) { 177 + record_f(&fpstats->newEvents, p + pos, CHUNKSIZE); 178 + if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[level])) { 179 + return pos; 180 + } else { 181 + mergeEvents(&fpstats->pastEvents, &fpstats->newEvents); 182 + if (penalty > 0) penalty--; 183 + } 184 + } 185 + assert(pos == blockSize); 186 + return blockSize; 187 + (void)flushEvents; (void)removeEvents; 188 + } 189 + 190 + /* ZSTD_splitBlock_fromBorders(): very fast strategy : 191 + * compare fingerprint from beginning and end of the block, 192 + * derive from their difference if it's preferable to split in the middle, 193 + * repeat the process a second time, for finer grained decision. 194 + * 3 times did not brought improvements, so I stopped at 2. 195 + * Benefits are good enough for a cheap heuristic. 196 + * More accurate splitting saves more, but speed impact is also more perceptible. 197 + * For better accuracy, use more elaborate variant *_byChunks. 198 + */ 199 + static size_t ZSTD_splitBlock_fromBorders(const void* blockStart, size_t blockSize, 200 + void* workspace, size_t wkspSize) 201 + { 202 + #define SEGMENT_SIZE 512 203 + FPStats* const fpstats = (FPStats*)workspace; 204 + Fingerprint* middleEvents = (Fingerprint*)(void*)((char*)workspace + 512 * sizeof(unsigned)); 205 + assert(blockSize == (128 << 10)); 206 + assert(workspace != NULL); 207 + assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0); 208 + ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats)); 209 + assert(wkspSize >= sizeof(FPStats)); (void)wkspSize; 210 + 211 + initStats(fpstats); 212 + HIST_add(fpstats->pastEvents.events, blockStart, SEGMENT_SIZE); 213 + HIST_add(fpstats->newEvents.events, (const char*)blockStart + blockSize - SEGMENT_SIZE, SEGMENT_SIZE); 214 + fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents = SEGMENT_SIZE; 215 + if (!compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, 0, 8)) 216 + return blockSize; 217 + 218 + HIST_add(middleEvents->events, (const char*)blockStart + blockSize/2 - SEGMENT_SIZE/2, SEGMENT_SIZE); 219 + middleEvents->nbEvents = SEGMENT_SIZE; 220 + { U64 const distFromBegin = fpDistance(&fpstats->pastEvents, middleEvents, 8); 221 + U64 const distFromEnd = fpDistance(&fpstats->newEvents, middleEvents, 8); 222 + U64 const minDistance = SEGMENT_SIZE * SEGMENT_SIZE / 3; 223 + if (abs64((S64)distFromBegin - (S64)distFromEnd) < minDistance) 224 + return 64 KB; 225 + return (distFromBegin > distFromEnd) ? 32 KB : 96 KB; 226 + } 227 + } 228 + 229 + size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize, 230 + int level, 231 + void* workspace, size_t wkspSize) 232 + { 233 + DEBUGLOG(6, "ZSTD_splitBlock (level=%i)", level); 234 + assert(0<=level && level<=4); 235 + if (level == 0) 236 + return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize); 237 + /* level >= 1*/ 238 + return ZSTD_splitBlock_byChunks(blockStart, blockSize, level-1, workspace, wkspSize); 239 + }
+34
lib/zstd/compress/zstd_preSplit.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 2 + /* 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 4 + * All rights reserved. 5 + * 6 + * This source code is licensed under both the BSD-style license (found in the 7 + * LICENSE file in the root directory of this source tree) and the GPLv2 (found 8 + * in the COPYING file in the root directory of this source tree). 9 + * You may select, at your option, one of the above-listed licenses. 10 + */ 11 + 12 + #ifndef ZSTD_PRESPLIT_H 13 + #define ZSTD_PRESPLIT_H 14 + 15 + #include <linux/types.h> /* size_t */ 16 + 17 + #define ZSTD_SLIPBLOCK_WORKSPACESIZE 8208 18 + 19 + /* ZSTD_splitBlock(): 20 + * @level must be a value between 0 and 4. 21 + * higher levels spend more energy to detect block boundaries. 22 + * @workspace must be aligned for size_t. 23 + * @wkspSize must be at least >= ZSTD_SLIPBLOCK_WORKSPACESIZE 24 + * note: 25 + * For the time being, this function only accepts full 128 KB blocks. 26 + * Therefore, @blockSize must be == 128 KB. 27 + * While this could be extended to smaller sizes in the future, 28 + * it is not yet clear if this would be useful. TBD. 29 + */ 30 + size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize, 31 + int level, 32 + void* workspace, size_t wkspSize); 33 + 34 + #endif /* ZSTD_PRESPLIT_H */
+553 -352
lib/zstd/decompress/huf_decompress.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* ****************************************************************** 2 3 * huff0 huffman decoder, 3 4 * part of Finite State Entropy library 4 - * Copyright (c) Yann Collet, Facebook, Inc. 5 + * Copyright (c) Meta Platforms, Inc. and affiliates. 5 6 * 6 7 * You can contact the author at : 7 8 * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy ··· 20 19 #include "../common/compiler.h" 21 20 #include "../common/bitstream.h" /* BIT_* */ 22 21 #include "../common/fse.h" /* to compress headers */ 23 - #define HUF_STATIC_LINKING_ONLY 24 22 #include "../common/huf.h" 25 23 #include "../common/error_private.h" 26 24 #include "../common/zstd_internal.h" 25 + #include "../common/bits.h" /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */ 27 26 28 27 /* ************************************************************** 29 28 * Constants ··· 35 34 * Macros 36 35 ****************************************************************/ 37 36 37 + #ifdef HUF_DISABLE_FAST_DECODE 38 + # define HUF_ENABLE_FAST_DECODE 0 39 + #else 40 + # define HUF_ENABLE_FAST_DECODE 1 41 + #endif 42 + 38 43 /* These two optional macros force the use one way or another of the two 39 44 * Huffman decompression implementations. You can't force in both directions 40 45 * at the same time. ··· 50 43 #error "Cannot force the use of the X1 and X2 decoders at the same time!" 51 44 #endif 52 45 53 - #if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2 54 - # define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE 46 + /* When DYNAMIC_BMI2 is enabled, fast decoders are only called when bmi2 is 47 + * supported at runtime, so we can add the BMI2 target attribute. 48 + * When it is disabled, we will still get BMI2 if it is enabled statically. 49 + */ 50 + #if DYNAMIC_BMI2 51 + # define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE 55 52 #else 56 - # define HUF_ASM_X86_64_BMI2_ATTRS 53 + # define HUF_FAST_BMI2_ATTRS 57 54 #endif 58 55 59 56 #define HUF_EXTERN_C 60 57 #define HUF_ASM_DECL HUF_EXTERN_C 61 58 62 - #if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)) 59 + #if DYNAMIC_BMI2 63 60 # define HUF_NEED_BMI2_FUNCTION 1 64 61 #else 65 62 # define HUF_NEED_BMI2_FUNCTION 0 66 - #endif 67 - 68 - #if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)) 69 - # define HUF_NEED_DEFAULT_FUNCTION 1 70 - #else 71 - # define HUF_NEED_DEFAULT_FUNCTION 0 72 63 #endif 73 64 74 65 /* ************************************************************** ··· 85 80 /* ************************************************************** 86 81 * BMI2 Variant Wrappers 87 82 ****************************************************************/ 83 + typedef size_t (*HUF_DecompressUsingDTableFn)(void *dst, size_t dstSize, 84 + const void *cSrc, 85 + size_t cSrcSize, 86 + const HUF_DTable *DTable); 87 + 88 88 #if DYNAMIC_BMI2 89 89 90 90 #define HUF_DGEN(fn) \ ··· 111 101 } \ 112 102 \ 113 103 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ 114 - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ 104 + size_t cSrcSize, HUF_DTable const* DTable, int flags) \ 115 105 { \ 116 - if (bmi2) { \ 106 + if (flags & HUF_flags_bmi2) { \ 117 107 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ 118 108 } \ 119 109 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ ··· 123 113 124 114 #define HUF_DGEN(fn) \ 125 115 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ 126 - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ 116 + size_t cSrcSize, HUF_DTable const* DTable, int flags) \ 127 117 { \ 128 - (void)bmi2; \ 118 + (void)flags; \ 129 119 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ 130 120 } 131 121 ··· 144 134 return dtd; 145 135 } 146 136 147 - #if ZSTD_ENABLE_ASM_X86_64_BMI2 148 - 149 - static size_t HUF_initDStream(BYTE const* ip) { 137 + static size_t HUF_initFastDStream(BYTE const* ip) { 150 138 BYTE const lastByte = ip[7]; 151 - size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; 139 + size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; 152 140 size_t const value = MEM_readLEST(ip) | 1; 153 141 assert(bitsConsumed <= 8); 142 + assert(sizeof(size_t) == 8); 154 143 return value << bitsConsumed; 155 144 } 145 + 146 + 147 + /* 148 + * The input/output arguments to the Huffman fast decoding loop: 149 + * 150 + * ip [in/out] - The input pointers, must be updated to reflect what is consumed. 151 + * op [in/out] - The output pointers, must be updated to reflect what is written. 152 + * bits [in/out] - The bitstream containers, must be updated to reflect the current state. 153 + * dt [in] - The decoding table. 154 + * ilowest [in] - The beginning of the valid range of the input. Decoders may read 155 + * down to this pointer. It may be below iend[0]. 156 + * oend [in] - The end of the output stream. op[3] must not cross oend. 157 + * iend [in] - The end of each input stream. ip[i] may cross iend[i], 158 + * as long as it is above ilowest, but that indicates corruption. 159 + */ 156 160 typedef struct { 157 161 BYTE const* ip[4]; 158 162 BYTE* op[4]; 159 163 U64 bits[4]; 160 164 void const* dt; 161 - BYTE const* ilimit; 165 + BYTE const* ilowest; 162 166 BYTE* oend; 163 167 BYTE const* iend[4]; 164 - } HUF_DecompressAsmArgs; 168 + } HUF_DecompressFastArgs; 169 + 170 + typedef void (*HUF_DecompressFastLoopFn)(HUF_DecompressFastArgs*); 165 171 166 172 /* 167 - * Initializes args for the asm decoding loop. 168 - * @returns 0 on success 169 - * 1 if the fallback implementation should be used. 173 + * Initializes args for the fast decoding loop. 174 + * @returns 1 on success 175 + * 0 if the fallback implementation should be used. 170 176 * Or an error code on failure. 171 177 */ 172 - static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable) 178 + static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable) 173 179 { 174 180 void const* dt = DTable + 1; 175 181 U32 const dtLog = HUF_getDTableDesc(DTable).tableLog; 176 182 177 - const BYTE* const ilimit = (const BYTE*)src + 6 + 8; 183 + const BYTE* const istart = (const BYTE*)src; 178 184 179 - BYTE* const oend = (BYTE*)dst + dstSize; 185 + BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize); 180 186 181 - /* The following condition is false on x32 platform, 182 - * but HUF_asm is not compatible with this ABI */ 183 - if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1; 187 + /* The fast decoding loop assumes 64-bit little-endian. 188 + * This condition is false on x32. 189 + */ 190 + if (!MEM_isLittleEndian() || MEM_32bits()) 191 + return 0; 192 + 193 + /* Avoid nullptr addition */ 194 + if (dstSize == 0) 195 + return 0; 196 + assert(dst != NULL); 184 197 185 198 /* strict minimum : jump table + 1 byte per stream */ 186 199 if (srcSize < 10) ··· 214 181 * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder. 215 182 */ 216 183 if (dtLog != HUF_DECODER_FAST_TABLELOG) 217 - return 1; 184 + return 0; 218 185 219 186 /* Read the jump table. */ 220 187 { 221 - const BYTE* const istart = (const BYTE*)src; 222 188 size_t const length1 = MEM_readLE16(istart); 223 189 size_t const length2 = MEM_readLE16(istart+2); 224 190 size_t const length3 = MEM_readLE16(istart+4); ··· 227 195 args->iend[2] = args->iend[1] + length2; 228 196 args->iend[3] = args->iend[2] + length3; 229 197 230 - /* HUF_initDStream() requires this, and this small of an input 198 + /* HUF_initFastDStream() requires this, and this small of an input 231 199 * won't benefit from the ASM loop anyways. 232 - * length1 must be >= 16 so that ip[0] >= ilimit before the loop 233 - * starts. 234 200 */ 235 - if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8) 236 - return 1; 201 + if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8) 202 + return 0; 237 203 if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */ 238 204 } 239 205 /* ip[] contains the position that is currently loaded into bits[]. */ ··· 248 218 249 219 /* No point to call the ASM loop for tiny outputs. */ 250 220 if (args->op[3] >= oend) 251 - return 1; 221 + return 0; 252 222 253 223 /* bits[] is the bit container. 254 224 * It is read from the MSB down to the LSB. ··· 257 227 * set, so that CountTrailingZeros(bits[]) can be used 258 228 * to count how many bits we've consumed. 259 229 */ 260 - args->bits[0] = HUF_initDStream(args->ip[0]); 261 - args->bits[1] = HUF_initDStream(args->ip[1]); 262 - args->bits[2] = HUF_initDStream(args->ip[2]); 263 - args->bits[3] = HUF_initDStream(args->ip[3]); 230 + args->bits[0] = HUF_initFastDStream(args->ip[0]); 231 + args->bits[1] = HUF_initFastDStream(args->ip[1]); 232 + args->bits[2] = HUF_initFastDStream(args->ip[2]); 233 + args->bits[3] = HUF_initFastDStream(args->ip[3]); 264 234 265 - /* If ip[] >= ilimit, it is guaranteed to be safe to 266 - * reload bits[]. It may be beyond its section, but is 267 - * guaranteed to be valid (>= istart). 268 - */ 269 - args->ilimit = ilimit; 235 + /* The decoders must be sure to never read beyond ilowest. 236 + * This is lower than iend[0], but allowing decoders to read 237 + * down to ilowest can allow an extra iteration or two in the 238 + * fast loop. 239 + */ 240 + args->ilowest = istart; 270 241 271 242 args->oend = oend; 272 243 args->dt = dt; 273 244 274 - return 0; 245 + return 1; 275 246 } 276 247 277 - static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd) 248 + static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArgs const* args, int stream, BYTE* segmentEnd) 278 249 { 279 250 /* Validate that we haven't overwritten. */ 280 251 if (args->op[stream] > segmentEnd) ··· 289 258 return ERROR(corruption_detected); 290 259 291 260 /* Construct the BIT_DStream_t. */ 292 - bit->bitContainer = MEM_readLE64(args->ip[stream]); 293 - bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]); 294 - bit->start = (const char*)args->iend[0]; 261 + assert(sizeof(size_t) == 8); 262 + bit->bitContainer = MEM_readLEST(args->ip[stream]); 263 + bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]); 264 + bit->start = (const char*)args->ilowest; 295 265 bit->limitPtr = bit->start + sizeof(size_t); 296 266 bit->ptr = (const char*)args->ip[stream]; 297 267 298 268 return 0; 299 269 } 300 - #endif 270 + 271 + /* Calls X(N) for each stream 0, 1, 2, 3. */ 272 + #define HUF_4X_FOR_EACH_STREAM(X) \ 273 + do { \ 274 + X(0); \ 275 + X(1); \ 276 + X(2); \ 277 + X(3); \ 278 + } while (0) 279 + 280 + /* Calls X(N, var) for each stream 0, 1, 2, 3. */ 281 + #define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \ 282 + do { \ 283 + X(0, (var)); \ 284 + X(1, (var)); \ 285 + X(2, (var)); \ 286 + X(3, (var)); \ 287 + } while (0) 301 288 302 289 303 290 #ifndef HUF_FORCE_DECOMPRESS_X2 ··· 332 283 static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { 333 284 U64 D4; 334 285 if (MEM_isLittleEndian()) { 335 - D4 = (symbol << 8) + nbBits; 286 + D4 = (U64)((symbol << 8) + nbBits); 336 287 } else { 337 - D4 = symbol + (nbBits << 8); 288 + D4 = (U64)(symbol + (nbBits << 8)); 338 289 } 290 + assert(D4 < (1U << 16)); 339 291 D4 *= 0x0001000100010001ULL; 340 292 return D4; 341 293 } ··· 379 329 BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; 380 330 } HUF_ReadDTableX1_Workspace; 381 331 382 - 383 - size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) 384 - { 385 - return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); 386 - } 387 - 388 - size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) 332 + size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags) 389 333 { 390 334 U32 tableLog = 0; 391 335 U32 nbSymbols = 0; ··· 394 350 DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); 395 351 /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ 396 352 397 - iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); 353 + iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags); 398 354 if (HUF_isError(iSize)) return iSize; 399 355 400 356 ··· 421 377 * rankStart[0] is not filled because there are no entries in the table for 422 378 * weight 0. 423 379 */ 424 - { 425 - int n; 426 - int nextRankStart = 0; 380 + { int n; 381 + U32 nextRankStart = 0; 427 382 int const unroll = 4; 428 383 int const nLimit = (int)nbSymbols - unroll + 1; 429 384 for (n=0; n<(int)tableLog+1; n++) { ··· 449 406 * We can switch based on the length to a different inner loop which is 450 407 * optimized for that particular case. 451 408 */ 452 - { 453 - U32 w; 454 - int symbol=wksp->rankVal[0]; 455 - int rankStart=0; 409 + { U32 w; 410 + int symbol = wksp->rankVal[0]; 411 + int rankStart = 0; 456 412 for (w=1; w<tableLog+1; ++w) { 457 413 int const symbolCount = wksp->rankVal[w]; 458 414 int const length = (1 << w) >> 1; ··· 525 483 } 526 484 527 485 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ 528 - *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) 486 + do { *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); } while (0) 529 487 530 - #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ 531 - if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 532 - HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) 488 + #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ 489 + do { \ 490 + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 491 + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \ 492 + } while (0) 533 493 534 - #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ 535 - if (MEM_64bits()) \ 536 - HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) 494 + #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ 495 + do { \ 496 + if (MEM_64bits()) \ 497 + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \ 498 + } while (0) 537 499 538 500 HINT_INLINE size_t 539 501 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) ··· 565 519 while (p < pEnd) 566 520 HUF_DECODE_SYMBOLX1_0(p, bitDPtr); 567 521 568 - return pEnd-pStart; 522 + return (size_t)(pEnd-pStart); 569 523 } 570 524 571 525 FORCE_INLINE_TEMPLATE size_t ··· 575 529 const HUF_DTable* DTable) 576 530 { 577 531 BYTE* op = (BYTE*)dst; 578 - BYTE* const oend = op + dstSize; 532 + BYTE* const oend = ZSTD_maybeNullPtrAdd(op, dstSize); 579 533 const void* dtPtr = DTable + 1; 580 534 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; 581 535 BIT_DStream_t bitD; ··· 591 545 return dstSize; 592 546 } 593 547 548 + /* HUF_decompress4X1_usingDTable_internal_body(): 549 + * Conditions : 550 + * @dstSize >= 6 551 + */ 594 552 FORCE_INLINE_TEMPLATE size_t 595 553 HUF_decompress4X1_usingDTable_internal_body( 596 554 void* dst, size_t dstSize, ··· 603 553 { 604 554 /* Check */ 605 555 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 556 + if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */ 606 557 607 558 { const BYTE* const istart = (const BYTE*) cSrc; 608 559 BYTE* const ostart = (BYTE*) dst; ··· 639 588 640 589 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 641 590 if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ 591 + assert(dstSize >= 6); /* validated above */ 642 592 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); 643 593 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); 644 594 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); ··· 702 650 } 703 651 #endif 704 652 705 - #if HUF_NEED_DEFAULT_FUNCTION 706 653 static 707 654 size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, 708 655 size_t cSrcSize, HUF_DTable const* DTable) { 709 656 return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); 710 657 } 711 - #endif 712 658 713 659 #if ZSTD_ENABLE_ASM_X86_64_BMI2 714 660 715 - HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN; 661 + HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN; 716 662 717 - static HUF_ASM_X86_64_BMI2_ATTRS 718 - size_t 719 - HUF_decompress4X1_usingDTable_internal_bmi2_asm( 720 - void* dst, size_t dstSize, 721 - const void* cSrc, size_t cSrcSize, 722 - const HUF_DTable* DTable) 663 + #endif 664 + 665 + static HUF_FAST_BMI2_ATTRS 666 + void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args) 723 667 { 724 - void const* dt = DTable + 1; 725 - const BYTE* const iend = (const BYTE*)cSrc + 6; 726 - BYTE* const oend = (BYTE*)dst + dstSize; 727 - HUF_DecompressAsmArgs args; 728 - { 729 - size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); 730 - FORWARD_IF_ERROR(ret, "Failed to init asm args"); 731 - if (ret != 0) 732 - return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); 668 + U64 bits[4]; 669 + BYTE const* ip[4]; 670 + BYTE* op[4]; 671 + U16 const* const dtable = (U16 const*)args->dt; 672 + BYTE* const oend = args->oend; 673 + BYTE const* const ilowest = args->ilowest; 674 + 675 + /* Copy the arguments to local variables */ 676 + ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); 677 + ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip)); 678 + ZSTD_memcpy(&op, &args->op, sizeof(op)); 679 + 680 + assert(MEM_isLittleEndian()); 681 + assert(!MEM_32bits()); 682 + 683 + for (;;) { 684 + BYTE* olimit; 685 + int stream; 686 + 687 + /* Assert loop preconditions */ 688 + #ifndef NDEBUG 689 + for (stream = 0; stream < 4; ++stream) { 690 + assert(op[stream] <= (stream == 3 ? oend : op[stream + 1])); 691 + assert(ip[stream] >= ilowest); 692 + } 693 + #endif 694 + /* Compute olimit */ 695 + { 696 + /* Each iteration produces 5 output symbols per stream */ 697 + size_t const oiters = (size_t)(oend - op[3]) / 5; 698 + /* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes 699 + * per stream. 700 + */ 701 + size_t const iiters = (size_t)(ip[0] - ilowest) / 7; 702 + /* We can safely run iters iterations before running bounds checks */ 703 + size_t const iters = MIN(oiters, iiters); 704 + size_t const symbols = iters * 5; 705 + 706 + /* We can simply check that op[3] < olimit, instead of checking all 707 + * of our bounds, since we can't hit the other bounds until we've run 708 + * iters iterations, which only happens when op[3] == olimit. 709 + */ 710 + olimit = op[3] + symbols; 711 + 712 + /* Exit fast decoding loop once we reach the end. */ 713 + if (op[3] == olimit) 714 + break; 715 + 716 + /* Exit the decoding loop if any input pointer has crossed the 717 + * previous one. This indicates corruption, and a precondition 718 + * to our loop is that ip[i] >= ip[0]. 719 + */ 720 + for (stream = 1; stream < 4; ++stream) { 721 + if (ip[stream] < ip[stream - 1]) 722 + goto _out; 723 + } 724 + } 725 + 726 + #ifndef NDEBUG 727 + for (stream = 1; stream < 4; ++stream) { 728 + assert(ip[stream] >= ip[stream - 1]); 729 + } 730 + #endif 731 + 732 + #define HUF_4X1_DECODE_SYMBOL(_stream, _symbol) \ 733 + do { \ 734 + int const index = (int)(bits[(_stream)] >> 53); \ 735 + int const entry = (int)dtable[index]; \ 736 + bits[(_stream)] <<= (entry & 0x3F); \ 737 + op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \ 738 + } while (0) 739 + 740 + #define HUF_4X1_RELOAD_STREAM(_stream) \ 741 + do { \ 742 + int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \ 743 + int const nbBits = ctz & 7; \ 744 + int const nbBytes = ctz >> 3; \ 745 + op[(_stream)] += 5; \ 746 + ip[(_stream)] -= nbBytes; \ 747 + bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \ 748 + bits[(_stream)] <<= nbBits; \ 749 + } while (0) 750 + 751 + /* Manually unroll the loop because compilers don't consistently 752 + * unroll the inner loops, which destroys performance. 753 + */ 754 + do { 755 + /* Decode 5 symbols in each of the 4 streams */ 756 + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0); 757 + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1); 758 + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2); 759 + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3); 760 + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4); 761 + 762 + /* Reload each of the 4 the bitstreams */ 763 + HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM); 764 + } while (op[3] < olimit); 765 + 766 + #undef HUF_4X1_DECODE_SYMBOL 767 + #undef HUF_4X1_RELOAD_STREAM 733 768 } 734 769 735 - assert(args.ip[0] >= args.ilimit); 736 - HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args); 770 + _out: 737 771 738 - /* Our loop guarantees that ip[] >= ilimit and that we haven't 772 + /* Save the final values of each of the state variables back to args. */ 773 + ZSTD_memcpy(&args->bits, &bits, sizeof(bits)); 774 + ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip)); 775 + ZSTD_memcpy(&args->op, &op, sizeof(op)); 776 + } 777 + 778 + /* 779 + * @returns @p dstSize on success (>= 6) 780 + * 0 if the fallback implementation should be used 781 + * An error if an error occurred 782 + */ 783 + static HUF_FAST_BMI2_ATTRS 784 + size_t 785 + HUF_decompress4X1_usingDTable_internal_fast( 786 + void* dst, size_t dstSize, 787 + const void* cSrc, size_t cSrcSize, 788 + const HUF_DTable* DTable, 789 + HUF_DecompressFastLoopFn loopFn) 790 + { 791 + void const* dt = DTable + 1; 792 + BYTE const* const ilowest = (BYTE const*)cSrc; 793 + BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize); 794 + HUF_DecompressFastArgs args; 795 + { size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); 796 + FORWARD_IF_ERROR(ret, "Failed to init fast loop args"); 797 + if (ret == 0) 798 + return 0; 799 + } 800 + 801 + assert(args.ip[0] >= args.ilowest); 802 + loopFn(&args); 803 + 804 + /* Our loop guarantees that ip[] >= ilowest and that we haven't 739 805 * overwritten any op[]. 740 806 */ 741 - assert(args.ip[0] >= iend); 742 - assert(args.ip[1] >= iend); 743 - assert(args.ip[2] >= iend); 744 - assert(args.ip[3] >= iend); 807 + assert(args.ip[0] >= ilowest); 808 + assert(args.ip[0] >= ilowest); 809 + assert(args.ip[1] >= ilowest); 810 + assert(args.ip[2] >= ilowest); 811 + assert(args.ip[3] >= ilowest); 745 812 assert(args.op[3] <= oend); 746 - (void)iend; 813 + 814 + assert(ilowest == args.ilowest); 815 + assert(ilowest + 6 == args.iend[0]); 816 + (void)ilowest; 747 817 748 818 /* finish bit streams one by one. */ 749 - { 750 - size_t const segmentSize = (dstSize+3) / 4; 819 + { size_t const segmentSize = (dstSize+3) / 4; 751 820 BYTE* segmentEnd = (BYTE*)dst; 752 821 int i; 753 822 for (i = 0; i < 4; ++i) { ··· 885 712 } 886 713 887 714 /* decoded size */ 715 + assert(dstSize != 0); 888 716 return dstSize; 889 717 } 890 - #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */ 891 - 892 - typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, 893 - const void *cSrc, 894 - size_t cSrcSize, 895 - const HUF_DTable *DTable); 896 718 897 719 HUF_DGEN(HUF_decompress1X1_usingDTable_internal) 898 720 899 721 static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, 900 - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) 722 + size_t cSrcSize, HUF_DTable const* DTable, int flags) 901 723 { 724 + HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X1_usingDTable_internal_default; 725 + HUF_DecompressFastLoopFn loopFn = HUF_decompress4X1_usingDTable_internal_fast_c_loop; 726 + 902 727 #if DYNAMIC_BMI2 903 - if (bmi2) { 728 + if (flags & HUF_flags_bmi2) { 729 + fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2; 904 730 # if ZSTD_ENABLE_ASM_X86_64_BMI2 905 - return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); 906 - # else 907 - return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); 731 + if (!(flags & HUF_flags_disableAsm)) { 732 + loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop; 733 + } 908 734 # endif 735 + } else { 736 + return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable); 909 737 } 910 - #else 911 - (void)bmi2; 912 738 #endif 913 739 914 740 #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) 915 - return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); 916 - #else 917 - return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); 741 + if (!(flags & HUF_flags_disableAsm)) { 742 + loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop; 743 + } 918 744 #endif 745 + 746 + if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) { 747 + size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn); 748 + if (ret != 0) 749 + return ret; 750 + } 751 + return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable); 919 752 } 920 753 921 - 922 - size_t HUF_decompress1X1_usingDTable( 923 - void* dst, size_t dstSize, 924 - const void* cSrc, size_t cSrcSize, 925 - const HUF_DTable* DTable) 926 - { 927 - DTableDesc dtd = HUF_getDTableDesc(DTable); 928 - if (dtd.tableType != 0) return ERROR(GENERIC); 929 - return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 930 - } 931 - 932 - size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 754 + static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 933 755 const void* cSrc, size_t cSrcSize, 934 - void* workSpace, size_t wkspSize) 756 + void* workSpace, size_t wkspSize, int flags) 935 757 { 936 758 const BYTE* ip = (const BYTE*) cSrc; 937 759 938 - size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); 760 + size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags); 939 761 if (HUF_isError(hSize)) return hSize; 940 762 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 941 763 ip += hSize; cSrcSize -= hSize; 942 764 943 - return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 765 + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); 944 766 } 945 - 946 - 947 - size_t HUF_decompress4X1_usingDTable( 948 - void* dst, size_t dstSize, 949 - const void* cSrc, size_t cSrcSize, 950 - const HUF_DTable* DTable) 951 - { 952 - DTableDesc dtd = HUF_getDTableDesc(DTable); 953 - if (dtd.tableType != 0) return ERROR(GENERIC); 954 - return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 955 - } 956 - 957 - static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 958 - const void* cSrc, size_t cSrcSize, 959 - void* workSpace, size_t wkspSize, int bmi2) 960 - { 961 - const BYTE* ip = (const BYTE*) cSrc; 962 - 963 - size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 964 - if (HUF_isError(hSize)) return hSize; 965 - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 966 - ip += hSize; cSrcSize -= hSize; 967 - 968 - return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 969 - } 970 - 971 - size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 972 - const void* cSrc, size_t cSrcSize, 973 - void* workSpace, size_t wkspSize) 974 - { 975 - return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); 976 - } 977 - 978 767 979 768 #endif /* HUF_FORCE_DECOMPRESS_X2 */ 980 769 ··· 1120 985 1121 986 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, 1122 987 const sortedSymbol_t* sortedList, 1123 - const U32* rankStart, rankValCol_t *rankValOrigin, const U32 maxWeight, 988 + const U32* rankStart, rankValCol_t* rankValOrigin, const U32 maxWeight, 1124 989 const U32 nbBitsBaseline) 1125 990 { 1126 991 U32* const rankVal = rankValOrigin[0]; ··· 1175 1040 1176 1041 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, 1177 1042 const void* src, size_t srcSize, 1178 - void* workSpace, size_t wkspSize) 1179 - { 1180 - return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); 1181 - } 1182 - 1183 - size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, 1184 - const void* src, size_t srcSize, 1185 - void* workSpace, size_t wkspSize, int bmi2) 1043 + void* workSpace, size_t wkspSize, int flags) 1186 1044 { 1187 1045 U32 tableLog, maxW, nbSymbols; 1188 1046 DTableDesc dtd = HUF_getDTableDesc(DTable); ··· 1197 1069 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 1198 1070 /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ 1199 1071 1200 - iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2); 1072 + iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), flags); 1201 1073 if (HUF_isError(iSize)) return iSize; 1202 1074 1203 1075 /* check result */ ··· 1287 1159 } 1288 1160 1289 1161 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ 1290 - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 1162 + do { ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); } while (0) 1291 1163 1292 - #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ 1293 - if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 1294 - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 1164 + #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ 1165 + do { \ 1166 + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ 1167 + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \ 1168 + } while (0) 1295 1169 1296 - #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ 1297 - if (MEM_64bits()) \ 1298 - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) 1170 + #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ 1171 + do { \ 1172 + if (MEM_64bits()) \ 1173 + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \ 1174 + } while (0) 1299 1175 1300 1176 HINT_INLINE size_t 1301 1177 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, ··· 1359 1227 1360 1228 /* decode */ 1361 1229 { BYTE* const ostart = (BYTE*) dst; 1362 - BYTE* const oend = ostart + dstSize; 1230 + BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, dstSize); 1363 1231 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ 1364 1232 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; 1365 1233 DTableDesc const dtd = HUF_getDTableDesc(DTable); ··· 1372 1240 /* decoded size */ 1373 1241 return dstSize; 1374 1242 } 1243 + 1244 + /* HUF_decompress4X2_usingDTable_internal_body(): 1245 + * Conditions: 1246 + * @dstSize >= 6 1247 + */ 1375 1248 FORCE_INLINE_TEMPLATE size_t 1376 1249 HUF_decompress4X2_usingDTable_internal_body( 1377 1250 void* dst, size_t dstSize, ··· 1384 1247 const HUF_DTable* DTable) 1385 1248 { 1386 1249 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ 1250 + if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */ 1387 1251 1388 1252 { const BYTE* const istart = (const BYTE*) cSrc; 1389 1253 BYTE* const ostart = (BYTE*) dst; ··· 1418 1280 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1419 1281 U32 const dtLog = dtd.tableLog; 1420 1282 1421 - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 1422 - if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ 1283 + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ 1284 + if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ 1285 + assert(dstSize >= 6 /* validated above */); 1423 1286 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); 1424 1287 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); 1425 1288 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); ··· 1505 1366 } 1506 1367 #endif 1507 1368 1508 - #if HUF_NEED_DEFAULT_FUNCTION 1509 1369 static 1510 1370 size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, 1511 1371 size_t cSrcSize, HUF_DTable const* DTable) { 1512 1372 return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); 1513 1373 } 1514 - #endif 1515 1374 1516 1375 #if ZSTD_ENABLE_ASM_X86_64_BMI2 1517 1376 1518 - HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN; 1377 + HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN; 1519 1378 1520 - static HUF_ASM_X86_64_BMI2_ATTRS size_t 1521 - HUF_decompress4X2_usingDTable_internal_bmi2_asm( 1522 - void* dst, size_t dstSize, 1523 - const void* cSrc, size_t cSrcSize, 1524 - const HUF_DTable* DTable) { 1525 - void const* dt = DTable + 1; 1526 - const BYTE* const iend = (const BYTE*)cSrc + 6; 1527 - BYTE* const oend = (BYTE*)dst + dstSize; 1528 - HUF_DecompressAsmArgs args; 1529 - { 1530 - size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); 1531 - FORWARD_IF_ERROR(ret, "Failed to init asm args"); 1532 - if (ret != 0) 1533 - return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); 1379 + #endif 1380 + 1381 + static HUF_FAST_BMI2_ATTRS 1382 + void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args) 1383 + { 1384 + U64 bits[4]; 1385 + BYTE const* ip[4]; 1386 + BYTE* op[4]; 1387 + BYTE* oend[4]; 1388 + HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt; 1389 + BYTE const* const ilowest = args->ilowest; 1390 + 1391 + /* Copy the arguments to local registers. */ 1392 + ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); 1393 + ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip)); 1394 + ZSTD_memcpy(&op, &args->op, sizeof(op)); 1395 + 1396 + oend[0] = op[1]; 1397 + oend[1] = op[2]; 1398 + oend[2] = op[3]; 1399 + oend[3] = args->oend; 1400 + 1401 + assert(MEM_isLittleEndian()); 1402 + assert(!MEM_32bits()); 1403 + 1404 + for (;;) { 1405 + BYTE* olimit; 1406 + int stream; 1407 + 1408 + /* Assert loop preconditions */ 1409 + #ifndef NDEBUG 1410 + for (stream = 0; stream < 4; ++stream) { 1411 + assert(op[stream] <= oend[stream]); 1412 + assert(ip[stream] >= ilowest); 1413 + } 1414 + #endif 1415 + /* Compute olimit */ 1416 + { 1417 + /* Each loop does 5 table lookups for each of the 4 streams. 1418 + * Each table lookup consumes up to 11 bits of input, and produces 1419 + * up to 2 bytes of output. 1420 + */ 1421 + /* We can consume up to 7 bytes of input per iteration per stream. 1422 + * We also know that each input pointer is >= ip[0]. So we can run 1423 + * iters loops before running out of input. 1424 + */ 1425 + size_t iters = (size_t)(ip[0] - ilowest) / 7; 1426 + /* Each iteration can produce up to 10 bytes of output per stream. 1427 + * Each output stream my advance at different rates. So take the 1428 + * minimum number of safe iterations among all the output streams. 1429 + */ 1430 + for (stream = 0; stream < 4; ++stream) { 1431 + size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10; 1432 + iters = MIN(iters, oiters); 1433 + } 1434 + 1435 + /* Each iteration produces at least 5 output symbols. So until 1436 + * op[3] crosses olimit, we know we haven't executed iters 1437 + * iterations yet. This saves us maintaining an iters counter, 1438 + * at the expense of computing the remaining # of iterations 1439 + * more frequently. 1440 + */ 1441 + olimit = op[3] + (iters * 5); 1442 + 1443 + /* Exit the fast decoding loop once we reach the end. */ 1444 + if (op[3] == olimit) 1445 + break; 1446 + 1447 + /* Exit the decoding loop if any input pointer has crossed the 1448 + * previous one. This indicates corruption, and a precondition 1449 + * to our loop is that ip[i] >= ip[0]. 1450 + */ 1451 + for (stream = 1; stream < 4; ++stream) { 1452 + if (ip[stream] < ip[stream - 1]) 1453 + goto _out; 1454 + } 1455 + } 1456 + 1457 + #ifndef NDEBUG 1458 + for (stream = 1; stream < 4; ++stream) { 1459 + assert(ip[stream] >= ip[stream - 1]); 1460 + } 1461 + #endif 1462 + 1463 + #define HUF_4X2_DECODE_SYMBOL(_stream, _decode3) \ 1464 + do { \ 1465 + if ((_decode3) || (_stream) != 3) { \ 1466 + int const index = (int)(bits[(_stream)] >> 53); \ 1467 + HUF_DEltX2 const entry = dtable[index]; \ 1468 + MEM_write16(op[(_stream)], entry.sequence); \ 1469 + bits[(_stream)] <<= (entry.nbBits) & 0x3F; \ 1470 + op[(_stream)] += (entry.length); \ 1471 + } \ 1472 + } while (0) 1473 + 1474 + #define HUF_4X2_RELOAD_STREAM(_stream) \ 1475 + do { \ 1476 + HUF_4X2_DECODE_SYMBOL(3, 1); \ 1477 + { \ 1478 + int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \ 1479 + int const nbBits = ctz & 7; \ 1480 + int const nbBytes = ctz >> 3; \ 1481 + ip[(_stream)] -= nbBytes; \ 1482 + bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \ 1483 + bits[(_stream)] <<= nbBits; \ 1484 + } \ 1485 + } while (0) 1486 + 1487 + /* Manually unroll the loop because compilers don't consistently 1488 + * unroll the inner loops, which destroys performance. 1489 + */ 1490 + do { 1491 + /* Decode 5 symbols from each of the first 3 streams. 1492 + * The final stream will be decoded during the reload phase 1493 + * to reduce register pressure. 1494 + */ 1495 + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); 1496 + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); 1497 + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); 1498 + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); 1499 + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); 1500 + 1501 + /* Decode one symbol from the final stream */ 1502 + HUF_4X2_DECODE_SYMBOL(3, 1); 1503 + 1504 + /* Decode 4 symbols from the final stream & reload bitstreams. 1505 + * The final stream is reloaded last, meaning that all 5 symbols 1506 + * are decoded from the final stream before it is reloaded. 1507 + */ 1508 + HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM); 1509 + } while (op[3] < olimit); 1534 1510 } 1535 1511 1536 - assert(args.ip[0] >= args.ilimit); 1537 - HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args); 1512 + #undef HUF_4X2_DECODE_SYMBOL 1513 + #undef HUF_4X2_RELOAD_STREAM 1514 + 1515 + _out: 1516 + 1517 + /* Save the final values of each of the state variables back to args. */ 1518 + ZSTD_memcpy(&args->bits, &bits, sizeof(bits)); 1519 + ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip)); 1520 + ZSTD_memcpy(&args->op, &op, sizeof(op)); 1521 + } 1522 + 1523 + 1524 + static HUF_FAST_BMI2_ATTRS size_t 1525 + HUF_decompress4X2_usingDTable_internal_fast( 1526 + void* dst, size_t dstSize, 1527 + const void* cSrc, size_t cSrcSize, 1528 + const HUF_DTable* DTable, 1529 + HUF_DecompressFastLoopFn loopFn) { 1530 + void const* dt = DTable + 1; 1531 + const BYTE* const ilowest = (const BYTE*)cSrc; 1532 + BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize); 1533 + HUF_DecompressFastArgs args; 1534 + { 1535 + size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); 1536 + FORWARD_IF_ERROR(ret, "Failed to init asm args"); 1537 + if (ret == 0) 1538 + return 0; 1539 + } 1540 + 1541 + assert(args.ip[0] >= args.ilowest); 1542 + loopFn(&args); 1538 1543 1539 1544 /* note : op4 already verified within main loop */ 1540 - assert(args.ip[0] >= iend); 1541 - assert(args.ip[1] >= iend); 1542 - assert(args.ip[2] >= iend); 1543 - assert(args.ip[3] >= iend); 1545 + assert(args.ip[0] >= ilowest); 1546 + assert(args.ip[1] >= ilowest); 1547 + assert(args.ip[2] >= ilowest); 1548 + assert(args.ip[3] >= ilowest); 1544 1549 assert(args.op[3] <= oend); 1545 - (void)iend; 1550 + 1551 + assert(ilowest == args.ilowest); 1552 + assert(ilowest + 6 == args.iend[0]); 1553 + (void)ilowest; 1546 1554 1547 1555 /* finish bitStreams one by one */ 1548 1556 { ··· 1712 1426 /* decoded size */ 1713 1427 return dstSize; 1714 1428 } 1715 - #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */ 1716 1429 1717 1430 static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, 1718 - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) 1431 + size_t cSrcSize, HUF_DTable const* DTable, int flags) 1719 1432 { 1433 + HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X2_usingDTable_internal_default; 1434 + HUF_DecompressFastLoopFn loopFn = HUF_decompress4X2_usingDTable_internal_fast_c_loop; 1435 + 1720 1436 #if DYNAMIC_BMI2 1721 - if (bmi2) { 1437 + if (flags & HUF_flags_bmi2) { 1438 + fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2; 1722 1439 # if ZSTD_ENABLE_ASM_X86_64_BMI2 1723 - return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); 1724 - # else 1725 - return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); 1440 + if (!(flags & HUF_flags_disableAsm)) { 1441 + loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop; 1442 + } 1726 1443 # endif 1444 + } else { 1445 + return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable); 1727 1446 } 1728 - #else 1729 - (void)bmi2; 1730 1447 #endif 1731 1448 1732 1449 #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) 1733 - return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable); 1734 - #else 1735 - return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable); 1450 + if (!(flags & HUF_flags_disableAsm)) { 1451 + loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop; 1452 + } 1736 1453 #endif 1454 + 1455 + if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) { 1456 + size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn); 1457 + if (ret != 0) 1458 + return ret; 1459 + } 1460 + return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable); 1737 1461 } 1738 1462 1739 1463 HUF_DGEN(HUF_decompress1X2_usingDTable_internal) 1740 1464 1741 - size_t HUF_decompress1X2_usingDTable( 1742 - void* dst, size_t dstSize, 1743 - const void* cSrc, size_t cSrcSize, 1744 - const HUF_DTable* DTable) 1745 - { 1746 - DTableDesc dtd = HUF_getDTableDesc(DTable); 1747 - if (dtd.tableType != 1) return ERROR(GENERIC); 1748 - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1749 - } 1750 - 1751 1465 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, 1752 1466 const void* cSrc, size_t cSrcSize, 1753 - void* workSpace, size_t wkspSize) 1467 + void* workSpace, size_t wkspSize, int flags) 1754 1468 { 1755 1469 const BYTE* ip = (const BYTE*) cSrc; 1756 1470 1757 1471 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, 1758 - workSpace, wkspSize); 1472 + workSpace, wkspSize, flags); 1759 1473 if (HUF_isError(hSize)) return hSize; 1760 1474 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 1761 1475 ip += hSize; cSrcSize -= hSize; 1762 1476 1763 - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); 1477 + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, flags); 1764 1478 } 1765 1479 1766 - 1767 - size_t HUF_decompress4X2_usingDTable( 1768 - void* dst, size_t dstSize, 1769 - const void* cSrc, size_t cSrcSize, 1770 - const HUF_DTable* DTable) 1771 - { 1772 - DTableDesc dtd = HUF_getDTableDesc(DTable); 1773 - if (dtd.tableType != 1) return ERROR(GENERIC); 1774 - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1775 - } 1776 - 1777 - static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, 1480 + static size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 1778 1481 const void* cSrc, size_t cSrcSize, 1779 - void* workSpace, size_t wkspSize, int bmi2) 1482 + void* workSpace, size_t wkspSize, int flags) 1780 1483 { 1781 1484 const BYTE* ip = (const BYTE*) cSrc; 1782 1485 1783 1486 size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, 1784 - workSpace, wkspSize); 1487 + workSpace, wkspSize, flags); 1785 1488 if (HUF_isError(hSize)) return hSize; 1786 1489 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 1787 1490 ip += hSize; cSrcSize -= hSize; 1788 1491 1789 - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 1492 + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); 1790 1493 } 1791 - 1792 - size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 1793 - const void* cSrc, size_t cSrcSize, 1794 - void* workSpace, size_t wkspSize) 1795 - { 1796 - return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); 1797 - } 1798 - 1799 1494 1800 1495 #endif /* HUF_FORCE_DECOMPRESS_X1 */ 1801 1496 ··· 1784 1517 /* ***********************************/ 1785 1518 /* Universal decompression selectors */ 1786 1519 /* ***********************************/ 1787 - 1788 - size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, 1789 - const void* cSrc, size_t cSrcSize, 1790 - const HUF_DTable* DTable) 1791 - { 1792 - DTableDesc const dtd = HUF_getDTableDesc(DTable); 1793 - #if defined(HUF_FORCE_DECOMPRESS_X1) 1794 - (void)dtd; 1795 - assert(dtd.tableType == 0); 1796 - return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1797 - #elif defined(HUF_FORCE_DECOMPRESS_X2) 1798 - (void)dtd; 1799 - assert(dtd.tableType == 1); 1800 - return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1801 - #else 1802 - return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 1803 - HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1804 - #endif 1805 - } 1806 - 1807 - size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, 1808 - const void* cSrc, size_t cSrcSize, 1809 - const HUF_DTable* DTable) 1810 - { 1811 - DTableDesc const dtd = HUF_getDTableDesc(DTable); 1812 - #if defined(HUF_FORCE_DECOMPRESS_X1) 1813 - (void)dtd; 1814 - assert(dtd.tableType == 0); 1815 - return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1816 - #elif defined(HUF_FORCE_DECOMPRESS_X2) 1817 - (void)dtd; 1818 - assert(dtd.tableType == 1); 1819 - return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1820 - #else 1821 - return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : 1822 - HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); 1823 - #endif 1824 - } 1825 1520 1826 1521 1827 1522 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) ··· 1839 1610 #endif 1840 1611 } 1841 1612 1842 - 1843 - size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, 1844 - size_t dstSize, const void* cSrc, 1845 - size_t cSrcSize, void* workSpace, 1846 - size_t wkspSize) 1847 - { 1848 - /* validation checks */ 1849 - if (dstSize == 0) return ERROR(dstSize_tooSmall); 1850 - if (cSrcSize == 0) return ERROR(corruption_detected); 1851 - 1852 - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); 1853 - #if defined(HUF_FORCE_DECOMPRESS_X1) 1854 - (void)algoNb; 1855 - assert(algoNb == 0); 1856 - return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1857 - #elif defined(HUF_FORCE_DECOMPRESS_X2) 1858 - (void)algoNb; 1859 - assert(algoNb == 1); 1860 - return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1861 - #else 1862 - return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1863 - cSrcSize, workSpace, wkspSize): 1864 - HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); 1865 - #endif 1866 - } 1867 - } 1868 - 1869 1613 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, 1870 1614 const void* cSrc, size_t cSrcSize, 1871 - void* workSpace, size_t wkspSize) 1615 + void* workSpace, size_t wkspSize, int flags) 1872 1616 { 1873 1617 /* validation checks */ 1874 1618 if (dstSize == 0) return ERROR(dstSize_tooSmall); ··· 1854 1652 (void)algoNb; 1855 1653 assert(algoNb == 0); 1856 1654 return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, 1857 - cSrcSize, workSpace, wkspSize); 1655 + cSrcSize, workSpace, wkspSize, flags); 1858 1656 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1859 1657 (void)algoNb; 1860 1658 assert(algoNb == 1); 1861 1659 return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1862 - cSrcSize, workSpace, wkspSize); 1660 + cSrcSize, workSpace, wkspSize, flags); 1863 1661 #else 1864 1662 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, 1865 - cSrcSize, workSpace, wkspSize): 1663 + cSrcSize, workSpace, wkspSize, flags): 1866 1664 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, 1867 - cSrcSize, workSpace, wkspSize); 1665 + cSrcSize, workSpace, wkspSize, flags); 1868 1666 #endif 1869 1667 } 1870 1668 } 1871 1669 1872 1670 1873 - size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1671 + size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags) 1874 1672 { 1875 1673 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1876 1674 #if defined(HUF_FORCE_DECOMPRESS_X1) 1877 1675 (void)dtd; 1878 1676 assert(dtd.tableType == 0); 1879 - return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1677 + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); 1880 1678 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1881 1679 (void)dtd; 1882 1680 assert(dtd.tableType == 1); 1883 - return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1681 + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); 1884 1682 #else 1885 - return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1886 - HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1683 + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) : 1684 + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); 1887 1685 #endif 1888 1686 } 1889 1687 1890 1688 #ifndef HUF_FORCE_DECOMPRESS_X2 1891 - size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1689 + size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags) 1892 1690 { 1893 1691 const BYTE* ip = (const BYTE*) cSrc; 1894 1692 1895 - size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1693 + size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags); 1896 1694 if (HUF_isError(hSize)) return hSize; 1897 1695 if (hSize >= cSrcSize) return ERROR(srcSize_wrong); 1898 1696 ip += hSize; cSrcSize -= hSize; 1899 1697 1900 - return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); 1698 + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); 1901 1699 } 1902 1700 #endif 1903 1701 1904 - size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) 1702 + size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags) 1905 1703 { 1906 1704 DTableDesc const dtd = HUF_getDTableDesc(DTable); 1907 1705 #if defined(HUF_FORCE_DECOMPRESS_X1) 1908 1706 (void)dtd; 1909 1707 assert(dtd.tableType == 0); 1910 - return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1708 + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); 1911 1709 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1912 1710 (void)dtd; 1913 1711 assert(dtd.tableType == 1); 1914 - return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1712 + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); 1915 1713 #else 1916 - return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : 1917 - HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); 1714 + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) : 1715 + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); 1918 1716 #endif 1919 1717 } 1920 1718 1921 - size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) 1719 + size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags) 1922 1720 { 1923 1721 /* validation checks */ 1924 1722 if (dstSize == 0) return ERROR(dstSize_tooSmall); ··· 1928 1726 #if defined(HUF_FORCE_DECOMPRESS_X1) 1929 1727 (void)algoNb; 1930 1728 assert(algoNb == 0); 1931 - return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1729 + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags); 1932 1730 #elif defined(HUF_FORCE_DECOMPRESS_X2) 1933 1731 (void)algoNb; 1934 1732 assert(algoNb == 1); 1935 - return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1733 + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags); 1936 1734 #else 1937 - return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : 1938 - HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); 1735 + return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags) : 1736 + HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags); 1939 1737 #endif 1940 1738 } 1941 1739 } 1942 -
+5 -4
lib/zstd/decompress/zstd_ddict.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 15 14 /*-******************************************************* 16 15 * Dependencies 17 16 *********************************************************/ 17 + #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */ 18 18 #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ 19 19 #include "../common/cpu.h" /* bmi2 */ 20 20 #include "../common/mem.h" /* low level memory routines */ 21 21 #define FSE_STATIC_LINKING_ONLY 22 22 #include "../common/fse.h" 23 - #define HUF_STATIC_LINKING_ONLY 24 23 #include "../common/huf.h" 25 24 #include "zstd_decompress_internal.h" 26 25 #include "zstd_ddict.h" ··· 132 131 ZSTD_memcpy(internalBuffer, dict, dictSize); 133 132 } 134 133 ddict->dictSize = dictSize; 135 - ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ 134 + ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */ 136 135 137 136 /* parse dictionary content */ 138 137 FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); ··· 238 237 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) 239 238 { 240 239 if (ddict==NULL) return 0; 241 - return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); 240 + return ddict->dictID; 242 241 }
+2 -1
lib/zstd/decompress/zstd_ddict.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the
+263 -110
lib/zstd/decompress/zstd_decompress.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 54 53 * Dependencies 55 54 *********************************************************/ 56 55 #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ 56 + #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */ 57 + #include "../common/error_private.h" 58 + #include "../common/zstd_internal.h" /* blockProperties_t */ 57 59 #include "../common/mem.h" /* low level memory routines */ 60 + #include "../common/bits.h" /* ZSTD_highbit32 */ 58 61 #define FSE_STATIC_LINKING_ONLY 59 62 #include "../common/fse.h" 60 - #define HUF_STATIC_LINKING_ONLY 61 63 #include "../common/huf.h" 62 64 #include <linux/xxhash.h> /* xxh64_reset, xxh64_update, xxh64_digest, XXH64 */ 63 - #include "../common/zstd_internal.h" /* blockProperties_t */ 64 65 #include "zstd_decompress_internal.h" /* ZSTD_DCtx */ 65 66 #include "zstd_ddict.h" /* ZSTD_DDictDictContent */ 66 67 #include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */ ··· 75 72 *************************************/ 76 73 77 74 #define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4 78 - #define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. 79 - * Currently, that means a 0.75 load factor. 80 - * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded 81 - * the load factor of the ddict hash set. 82 - */ 75 + #define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. 76 + * Currently, that means a 0.75 load factor. 77 + * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded 78 + * the load factor of the ddict hash set. 79 + */ 83 80 84 81 #define DDICT_HASHSET_TABLE_BASE_SIZE 64 85 82 #define DDICT_HASHSET_RESIZE_FACTOR 2 ··· 240 237 dctx->outBufferMode = ZSTD_bm_buffered; 241 238 dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; 242 239 dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; 240 + dctx->disableHufAsm = 0; 241 + dctx->maxBlockSizeParam = 0; 243 242 } 244 243 245 244 static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) ··· 258 253 dctx->streamStage = zdss_init; 259 254 dctx->noForwardProgress = 0; 260 255 dctx->oversizedDuration = 0; 256 + dctx->isFrameDecompression = 1; 261 257 #if DYNAMIC_BMI2 262 258 dctx->bmi2 = ZSTD_cpuSupportsBmi2(); 263 259 #endif ··· 427 421 * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless 428 422 * @return : 0, `zfhPtr` is correctly filled, 429 423 * >0, `srcSize` is too small, value is wanted `srcSize` amount, 430 - * or an error code, which can be tested using ZSTD_isError() */ 431 - size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) 424 + ** or an error code, which can be tested using ZSTD_isError() */ 425 + size_t ZSTD_getFrameHeader_advanced(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) 432 426 { 433 427 const BYTE* ip = (const BYTE*)src; 434 428 size_t const minInputSize = ZSTD_startingInputLength(format); 435 429 436 - ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ 437 - if (srcSize < minInputSize) return minInputSize; 438 - RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); 430 + DEBUGLOG(5, "ZSTD_getFrameHeader_advanced: minInputSize = %zu, srcSize = %zu", minInputSize, srcSize); 439 431 432 + if (srcSize > 0) { 433 + /* note : technically could be considered an assert(), since it's an invalid entry */ 434 + RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter : src==NULL, but srcSize>0"); 435 + } 436 + if (srcSize < minInputSize) { 437 + if (srcSize > 0 && format != ZSTD_f_zstd1_magicless) { 438 + /* when receiving less than @minInputSize bytes, 439 + * control these bytes at least correspond to a supported magic number 440 + * in order to error out early if they don't. 441 + **/ 442 + size_t const toCopy = MIN(4, srcSize); 443 + unsigned char hbuf[4]; MEM_writeLE32(hbuf, ZSTD_MAGICNUMBER); 444 + assert(src != NULL); 445 + ZSTD_memcpy(hbuf, src, toCopy); 446 + if ( MEM_readLE32(hbuf) != ZSTD_MAGICNUMBER ) { 447 + /* not a zstd frame : let's check if it's a skippable frame */ 448 + MEM_writeLE32(hbuf, ZSTD_MAGIC_SKIPPABLE_START); 449 + ZSTD_memcpy(hbuf, src, toCopy); 450 + if ((MEM_readLE32(hbuf) & ZSTD_MAGIC_SKIPPABLE_MASK) != ZSTD_MAGIC_SKIPPABLE_START) { 451 + RETURN_ERROR(prefix_unknown, 452 + "first bytes don't correspond to any supported magic number"); 453 + } } } 454 + return minInputSize; 455 + } 456 + 457 + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzers may not understand that zfhPtr will be read only if return value is zero, since they are 2 different signals */ 440 458 if ( (format != ZSTD_f_zstd1_magicless) 441 459 && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { 442 460 if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { ··· 468 438 if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) 469 439 return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ 470 440 ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); 471 - zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); 472 441 zfhPtr->frameType = ZSTD_skippableFrame; 442 + zfhPtr->dictID = MEM_readLE32(src) - ZSTD_MAGIC_SKIPPABLE_START; 443 + zfhPtr->headerSize = ZSTD_SKIPPABLEHEADERSIZE; 444 + zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); 473 445 return 0; 474 446 } 475 447 RETURN_ERROR(prefix_unknown, ""); ··· 540 508 * @return : 0, `zfhPtr` is correctly filled, 541 509 * >0, `srcSize` is too small, value is wanted `srcSize` amount, 542 510 * or an error code, which can be tested using ZSTD_isError() */ 543 - size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) 511 + size_t ZSTD_getFrameHeader(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize) 544 512 { 545 513 return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); 546 514 } ··· 552 520 * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ 553 521 unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) 554 522 { 555 - { ZSTD_frameHeader zfh; 523 + { ZSTD_FrameHeader zfh; 556 524 if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0) 557 525 return ZSTD_CONTENTSIZE_ERROR; 558 526 if (zfh.frameType == ZSTD_skippableFrame) { ··· 572 540 sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); 573 541 RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, 574 542 frameParameter_unsupported, ""); 575 - { 576 - size_t const skippableSize = skippableHeaderSize + sizeU32; 543 + { size_t const skippableSize = skippableHeaderSize + sizeU32; 577 544 RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); 578 545 return skippableSize; 579 546 } 580 547 } 581 548 582 549 /*! ZSTD_readSkippableFrame() : 583 - * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer. 550 + * Retrieves content of a skippable frame, and writes it to dst buffer. 584 551 * 585 552 * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written, 586 553 * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested 587 554 * in the magicVariant. 588 555 * 589 - * Returns an error if destination buffer is not large enough, or if the frame is not skippable. 556 + * Returns an error if destination buffer is not large enough, or if this is not a valid skippable frame. 590 557 * 591 558 * @return : number of bytes written or a ZSTD error. 592 559 */ 593 - ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant, 594 - const void* src, size_t srcSize) 560 + size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, 561 + unsigned* magicVariant, /* optional, can be NULL */ 562 + const void* src, size_t srcSize) 595 563 { 596 - U32 const magicNumber = MEM_readLE32(src); 597 - size_t skippableFrameSize = readSkippableFrameSize(src, srcSize); 598 - size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE; 564 + RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); 599 565 600 - /* check input validity */ 601 - RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, ""); 602 - RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, ""); 603 - RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, ""); 566 + { U32 const magicNumber = MEM_readLE32(src); 567 + size_t skippableFrameSize = readSkippableFrameSize(src, srcSize); 568 + size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE; 604 569 605 - /* deliver payload */ 606 - if (skippableContentSize > 0 && dst != NULL) 607 - ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize); 608 - if (magicVariant != NULL) 609 - *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START; 610 - return skippableContentSize; 570 + /* check input validity */ 571 + RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, ""); 572 + RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, ""); 573 + RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, ""); 574 + 575 + /* deliver payload */ 576 + if (skippableContentSize > 0 && dst != NULL) 577 + ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize); 578 + if (magicVariant != NULL) 579 + *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START; 580 + return skippableContentSize; 581 + } 611 582 } 612 583 613 584 /* ZSTD_findDecompressedSize() : 614 - * compatible with legacy mode 615 585 * `srcSize` must be the exact length of some number of ZSTD compressed and/or 616 586 * skippable frames 617 - * @return : decompressed size of the frames contained */ 587 + * note: compatible with legacy mode 588 + * @return : decompressed size of the frames contained */ 618 589 unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) 619 590 { 620 591 unsigned long long totalDstSize = 0; ··· 627 592 628 593 if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { 629 594 size_t const skippableSize = readSkippableFrameSize(src, srcSize); 630 - if (ZSTD_isError(skippableSize)) { 631 - return ZSTD_CONTENTSIZE_ERROR; 632 - } 595 + if (ZSTD_isError(skippableSize)) return ZSTD_CONTENTSIZE_ERROR; 633 596 assert(skippableSize <= srcSize); 634 597 635 598 src = (const BYTE *)src + skippableSize; ··· 635 602 continue; 636 603 } 637 604 638 - { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); 639 - if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; 605 + { unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize); 606 + if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs; 640 607 641 - /* check for overflow */ 642 - if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; 643 - totalDstSize += ret; 608 + if (totalDstSize + fcs < totalDstSize) 609 + return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */ 610 + totalDstSize += fcs; 644 611 } 612 + /* skip to next frame */ 645 613 { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); 646 - if (ZSTD_isError(frameSrcSize)) { 647 - return ZSTD_CONTENTSIZE_ERROR; 648 - } 614 + if (ZSTD_isError(frameSrcSize)) return ZSTD_CONTENTSIZE_ERROR; 615 + assert(frameSrcSize <= srcSize); 649 616 650 617 src = (const BYTE *)src + frameSrcSize; 651 618 srcSize -= frameSrcSize; ··· 709 676 return frameSizeInfo; 710 677 } 711 678 712 - static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) 679 + static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize, ZSTD_format_e format) 713 680 { 714 681 ZSTD_frameSizeInfo frameSizeInfo; 715 682 ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); 716 683 717 684 718 - if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) 685 + if (format == ZSTD_f_zstd1 && (srcSize >= ZSTD_SKIPPABLEHEADERSIZE) 719 686 && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { 720 687 frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); 721 688 assert(ZSTD_isError(frameSizeInfo.compressedSize) || ··· 726 693 const BYTE* const ipstart = ip; 727 694 size_t remainingSize = srcSize; 728 695 size_t nbBlocks = 0; 729 - ZSTD_frameHeader zfh; 696 + ZSTD_FrameHeader zfh; 730 697 731 698 /* Extract Frame Header */ 732 - { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); 699 + { size_t const ret = ZSTD_getFrameHeader_advanced(&zfh, src, srcSize, format); 733 700 if (ZSTD_isError(ret)) 734 701 return ZSTD_errorFrameSizeInfo(ret); 735 702 if (ret > 0) ··· 763 730 ip += 4; 764 731 } 765 732 733 + frameSizeInfo.nbBlocks = nbBlocks; 766 734 frameSizeInfo.compressedSize = (size_t)(ip - ipstart); 767 735 frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) 768 736 ? zfh.frameContentSize 769 - : nbBlocks * zfh.blockSizeMax; 737 + : (unsigned long long)nbBlocks * zfh.blockSizeMax; 770 738 return frameSizeInfo; 771 739 } 772 740 } 773 741 742 + static size_t ZSTD_findFrameCompressedSize_advanced(const void *src, size_t srcSize, ZSTD_format_e format) { 743 + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, format); 744 + return frameSizeInfo.compressedSize; 745 + } 746 + 774 747 /* ZSTD_findFrameCompressedSize() : 775 - * compatible with legacy mode 776 - * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame 777 - * `srcSize` must be at least as large as the frame contained 778 - * @return : the compressed size of the frame starting at `src` */ 748 + * See docs in zstd.h 749 + * Note: compatible with legacy mode */ 779 750 size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) 780 751 { 781 - ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); 782 - return frameSizeInfo.compressedSize; 752 + return ZSTD_findFrameCompressedSize_advanced(src, srcSize, ZSTD_f_zstd1); 783 753 } 784 754 785 755 /* ZSTD_decompressBound() : 786 756 * compatible with legacy mode 787 - * `src` must point to the start of a ZSTD frame or a skippeable frame 757 + * `src` must point to the start of a ZSTD frame or a skippable frame 788 758 * `srcSize` must be at least as large as the frame contained 789 759 * @return : the maximum decompressed size of the compressed source 790 760 */ ··· 796 760 unsigned long long bound = 0; 797 761 /* Iterate over each frame */ 798 762 while (srcSize > 0) { 799 - ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); 763 + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1); 800 764 size_t const compressedSize = frameSizeInfo.compressedSize; 801 765 unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; 802 766 if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) ··· 809 773 return bound; 810 774 } 811 775 776 + size_t ZSTD_decompressionMargin(void const* src, size_t srcSize) 777 + { 778 + size_t margin = 0; 779 + unsigned maxBlockSize = 0; 780 + 781 + /* Iterate over each frame */ 782 + while (srcSize > 0) { 783 + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1); 784 + size_t const compressedSize = frameSizeInfo.compressedSize; 785 + unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; 786 + ZSTD_FrameHeader zfh; 787 + 788 + FORWARD_IF_ERROR(ZSTD_getFrameHeader(&zfh, src, srcSize), ""); 789 + if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) 790 + return ERROR(corruption_detected); 791 + 792 + if (zfh.frameType == ZSTD_frame) { 793 + /* Add the frame header to our margin */ 794 + margin += zfh.headerSize; 795 + /* Add the checksum to our margin */ 796 + margin += zfh.checksumFlag ? 4 : 0; 797 + /* Add 3 bytes per block */ 798 + margin += 3 * frameSizeInfo.nbBlocks; 799 + 800 + /* Compute the max block size */ 801 + maxBlockSize = MAX(maxBlockSize, zfh.blockSizeMax); 802 + } else { 803 + assert(zfh.frameType == ZSTD_skippableFrame); 804 + /* Add the entire skippable frame size to our margin. */ 805 + margin += compressedSize; 806 + } 807 + 808 + assert(srcSize >= compressedSize); 809 + src = (const BYTE*)src + compressedSize; 810 + srcSize -= compressedSize; 811 + } 812 + 813 + /* Add the max block size back to the margin. */ 814 + margin += maxBlockSize; 815 + 816 + return margin; 817 + } 812 818 813 819 /*-************************************************************* 814 820 * Frame decoding ··· 893 815 return regenSize; 894 816 } 895 817 896 - static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming) 818 + static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, int streaming) 897 819 { 898 820 (void)dctx; 899 821 (void)uncompressedSize; ··· 934 856 ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; 935 857 } 936 858 859 + /* Shrink the blockSizeMax if enabled */ 860 + if (dctx->maxBlockSizeParam != 0) 861 + dctx->fParams.blockSizeMax = MIN(dctx->fParams.blockSizeMax, (unsigned)dctx->maxBlockSizeParam); 862 + 937 863 /* Loop on each block */ 938 864 while (1) { 939 865 BYTE* oBlockEnd = oend; ··· 970 888 switch(blockProperties.blockType) 971 889 { 972 890 case bt_compressed: 973 - decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming); 891 + assert(dctx->isFrameDecompression == 1); 892 + decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, not_streaming); 974 893 break; 975 894 case bt_raw : 976 895 /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */ ··· 984 901 default: 985 902 RETURN_ERROR(corruption_detected, "invalid block type"); 986 903 } 987 - 988 - if (ZSTD_isError(decodedSize)) return decodedSize; 989 - if (dctx->validateChecksum) 904 + FORWARD_IF_ERROR(decodedSize, "Block decompression failure"); 905 + DEBUGLOG(5, "Decompressed block of dSize = %u", (unsigned)decodedSize); 906 + if (dctx->validateChecksum) { 990 907 xxh64_update(&dctx->xxhState, op, decodedSize); 991 - if (decodedSize != 0) 908 + } 909 + if (decodedSize) /* support dst = NULL,0 */ { 992 910 op += decodedSize; 911 + } 993 912 assert(ip != NULL); 994 913 ip += cBlockSize; 995 914 remainingSrcSize -= cBlockSize; ··· 1015 930 } 1016 931 ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0); 1017 932 /* Allow caller to get size read */ 933 + DEBUGLOG(4, "ZSTD_decompressFrame: decompressed frame of size %i, consuming %i bytes of input", (int)(op-ostart), (int)(ip - (const BYTE*)*srcPtr)); 1018 934 *srcPtr = ip; 1019 935 *srcSizePtr = remainingSrcSize; 1020 936 return (size_t)(op-ostart); 1021 937 } 1022 938 1023 - static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, 939 + static 940 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 941 + size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, 1024 942 void* dst, size_t dstCapacity, 1025 943 const void* src, size_t srcSize, 1026 944 const void* dict, size_t dictSize, ··· 1043 955 while (srcSize >= ZSTD_startingInputLength(dctx->format)) { 1044 956 1045 957 1046 - { U32 const magicNumber = MEM_readLE32(src); 1047 - DEBUGLOG(4, "reading magic number %08X (expecting %08X)", 1048 - (unsigned)magicNumber, ZSTD_MAGICNUMBER); 958 + if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) { 959 + U32 const magicNumber = MEM_readLE32(src); 960 + DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber); 1049 961 if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { 962 + /* skippable frame detected : skip it */ 1050 963 size_t const skippableSize = readSkippableFrameSize(src, srcSize); 1051 - FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); 964 + FORWARD_IF_ERROR(skippableSize, "invalid skippable frame"); 1052 965 assert(skippableSize <= srcSize); 1053 966 1054 967 src = (const BYTE *)src + skippableSize; 1055 968 srcSize -= skippableSize; 1056 - continue; 969 + continue; /* check next frame */ 1057 970 } } 1058 971 1059 972 if (ddict) { ··· 1150 1061 size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } 1151 1062 1152 1063 /* 1153 - * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, 1154 - * we allow taking a partial block as the input. Currently only raw uncompressed blocks can 1064 + * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, we 1065 + * allow taking a partial block as the input. Currently only raw uncompressed blocks can 1155 1066 * be streamed. 1156 1067 * 1157 1068 * For blocks that can be streamed, this allows us to reduce the latency until we produce ··· 1270 1181 { 1271 1182 case bt_compressed: 1272 1183 DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); 1273 - rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming); 1184 + assert(dctx->isFrameDecompression == 1); 1185 + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, is_streaming); 1274 1186 dctx->expected = 0; /* Streaming not supported */ 1275 1187 break; 1276 1188 case bt_raw : ··· 1340 1250 case ZSTDds_decodeSkippableHeader: 1341 1251 assert(src != NULL); 1342 1252 assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); 1253 + assert(dctx->format != ZSTD_f_zstd1_magicless); 1343 1254 ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ 1344 1255 dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ 1345 1256 dctx->stage = ZSTDds_skipFrame; ··· 1353 1262 1354 1263 default: 1355 1264 assert(0); /* impossible */ 1356 - RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ 1265 + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compilers require default to do something */ 1357 1266 } 1358 1267 } 1359 1268 ··· 1394 1303 /* in minimal huffman, we always use X1 variants */ 1395 1304 size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, 1396 1305 dictPtr, dictEnd - dictPtr, 1397 - workspace, workspaceSize); 1306 + workspace, workspaceSize, /* flags */ 0); 1398 1307 #else 1399 1308 size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, 1400 1309 dictPtr, (size_t)(dictEnd - dictPtr), 1401 - workspace, workspaceSize); 1310 + workspace, workspaceSize, /* flags */ 0); 1402 1311 #endif 1403 1312 RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); 1404 1313 dictPtr += hSize; ··· 1494 1403 dctx->prefixStart = NULL; 1495 1404 dctx->virtualStart = NULL; 1496 1405 dctx->dictEnd = NULL; 1497 - dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ 1406 + dctx->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */ 1498 1407 dctx->litEntropy = dctx->fseEntropy = 0; 1499 1408 dctx->dictID = 0; 1500 1409 dctx->bType = bt_reserved; 1410 + dctx->isFrameDecompression = 1; 1501 1411 ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); 1502 1412 ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ 1503 1413 dctx->LLTptr = dctx->entropy.LLTable; ··· 1557 1465 * This could for one of the following reasons : 1558 1466 * - The frame does not require a dictionary (most common case). 1559 1467 * - The frame was built with dictID intentionally removed. 1560 - * Needed dictionary is a hidden information. 1468 + * Needed dictionary is a hidden piece of information. 1561 1469 * Note : this use case also happens when using a non-conformant dictionary. 1562 1470 * - `srcSize` is too small, and as a result, frame header could not be decoded. 1563 1471 * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. ··· 1566 1474 * ZSTD_getFrameHeader(), which will provide a more precise error code. */ 1567 1475 unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) 1568 1476 { 1569 - ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; 1477 + ZSTD_FrameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0 }; 1570 1478 size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); 1571 1479 if (ZSTD_isError(hError)) return 0; 1572 1480 return zfp.dictID; ··· 1673 1581 size_t ZSTD_initDStream(ZSTD_DStream* zds) 1674 1582 { 1675 1583 DEBUGLOG(4, "ZSTD_initDStream"); 1676 - return ZSTD_initDStream_usingDDict(zds, NULL); 1584 + FORWARD_IF_ERROR(ZSTD_DCtx_reset(zds, ZSTD_reset_session_only), ""); 1585 + FORWARD_IF_ERROR(ZSTD_DCtx_refDDict(zds, NULL), ""); 1586 + return ZSTD_startingInputLength(zds->format); 1677 1587 } 1678 1588 1679 1589 /* ZSTD_initDStream_usingDDict() : ··· 1683 1589 * this function cannot fail */ 1684 1590 size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) 1685 1591 { 1592 + DEBUGLOG(4, "ZSTD_initDStream_usingDDict"); 1686 1593 FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); 1687 1594 FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); 1688 1595 return ZSTD_startingInputLength(dctx->format); ··· 1694 1599 * this function cannot fail */ 1695 1600 size_t ZSTD_resetDStream(ZSTD_DStream* dctx) 1696 1601 { 1602 + DEBUGLOG(4, "ZSTD_resetDStream"); 1697 1603 FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); 1698 1604 return ZSTD_startingInputLength(dctx->format); 1699 1605 } ··· 1766 1670 bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict; 1767 1671 bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts; 1768 1672 return bounds; 1673 + case ZSTD_d_disableHuffmanAssembly: 1674 + bounds.lowerBound = 0; 1675 + bounds.upperBound = 1; 1676 + return bounds; 1677 + case ZSTD_d_maxBlockSize: 1678 + bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN; 1679 + bounds.upperBound = ZSTD_BLOCKSIZE_MAX; 1680 + return bounds; 1681 + 1769 1682 default:; 1770 1683 } 1771 1684 bounds.error = ERROR(parameter_unsupported); ··· 1815 1710 case ZSTD_d_refMultipleDDicts: 1816 1711 *value = (int)dctx->refMultipleDDicts; 1817 1712 return 0; 1713 + case ZSTD_d_disableHuffmanAssembly: 1714 + *value = (int)dctx->disableHufAsm; 1715 + return 0; 1716 + case ZSTD_d_maxBlockSize: 1717 + *value = dctx->maxBlockSizeParam; 1718 + return 0; 1818 1719 default:; 1819 1720 } 1820 1721 RETURN_ERROR(parameter_unsupported, ""); ··· 1854 1743 } 1855 1744 dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value; 1856 1745 return 0; 1746 + case ZSTD_d_disableHuffmanAssembly: 1747 + CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value); 1748 + dctx->disableHufAsm = value != 0; 1749 + return 0; 1750 + case ZSTD_d_maxBlockSize: 1751 + if (value != 0) CHECK_DBOUNDS(ZSTD_d_maxBlockSize, value); 1752 + dctx->maxBlockSizeParam = value; 1753 + return 0; 1857 1754 default:; 1858 1755 } 1859 1756 RETURN_ERROR(parameter_unsupported, ""); ··· 1873 1754 || (reset == ZSTD_reset_session_and_parameters) ) { 1874 1755 dctx->streamStage = zdss_init; 1875 1756 dctx->noForwardProgress = 0; 1757 + dctx->isFrameDecompression = 1; 1876 1758 } 1877 1759 if ( (reset == ZSTD_reset_parameters) 1878 1760 || (reset == ZSTD_reset_session_and_parameters) ) { ··· 1890 1770 return ZSTD_sizeof_DCtx(dctx); 1891 1771 } 1892 1772 1893 - size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) 1773 + static size_t ZSTD_decodingBufferSize_internal(unsigned long long windowSize, unsigned long long frameContentSize, size_t blockSizeMax) 1894 1774 { 1895 - size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); 1896 - /* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/ 1897 - unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2); 1775 + size_t const blockSize = MIN((size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX), blockSizeMax); 1776 + /* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block 1777 + * ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing 1778 + * the block at the beginning of the output buffer, and maintain a full window. 1779 + * 1780 + * We need another blockSize worth of buffer so that we can store split 1781 + * literals at the end of the block without overwriting the extDict window. 1782 + */ 1783 + unsigned long long const neededRBSize = windowSize + (blockSize * 2) + (WILDCOPY_OVERLENGTH * 2); 1898 1784 unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); 1899 1785 size_t const minRBSize = (size_t) neededSize; 1900 1786 RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, 1901 1787 frameParameter_windowTooLarge, ""); 1902 1788 return minRBSize; 1789 + } 1790 + 1791 + size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) 1792 + { 1793 + return ZSTD_decodingBufferSize_internal(windowSize, frameContentSize, ZSTD_BLOCKSIZE_MAX); 1903 1794 } 1904 1795 1905 1796 size_t ZSTD_estimateDStreamSize(size_t windowSize) ··· 1924 1793 size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) 1925 1794 { 1926 1795 U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */ 1927 - ZSTD_frameHeader zfh; 1796 + ZSTD_FrameHeader zfh; 1928 1797 size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); 1929 1798 if (ZSTD_isError(err)) return err; 1930 1799 RETURN_ERROR_IF(err>0, srcSize_wrong, ""); ··· 2019 1888 U32 someMoreWork = 1; 2020 1889 2021 1890 DEBUGLOG(5, "ZSTD_decompressStream"); 1891 + assert(zds != NULL); 2022 1892 RETURN_ERROR_IF( 2023 1893 input->pos > input->size, 2024 1894 srcSize_wrong, ··· 2050 1918 if (zds->refMultipleDDicts && zds->ddictSet) { 2051 1919 ZSTD_DCtx_selectFrameDDict(zds); 2052 1920 } 2053 - DEBUGLOG(5, "header size : %u", (U32)hSize); 2054 1921 if (ZSTD_isError(hSize)) { 2055 1922 return hSize; /* error */ 2056 1923 } ··· 2063 1932 zds->lhSize += remainingInput; 2064 1933 } 2065 1934 input->pos = input->size; 1935 + /* check first few bytes */ 1936 + FORWARD_IF_ERROR( 1937 + ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format), 1938 + "First few bytes detected incorrect" ); 1939 + /* return hint input size */ 2066 1940 return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ 2067 1941 } 2068 1942 assert(ip != NULL); ··· 2079 1943 if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN 2080 1944 && zds->fParams.frameType != ZSTD_skippableFrame 2081 1945 && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { 2082 - size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart)); 1946 + size_t const cSize = ZSTD_findFrameCompressedSize_advanced(istart, (size_t)(iend-istart), zds->format); 2083 1947 if (cSize <= (size_t)(iend-istart)) { 2084 1948 /* shortcut : using single-pass mode */ 2085 1949 size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); 2086 1950 if (ZSTD_isError(decompressedSize)) return decompressedSize; 2087 - DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") 1951 + DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()"); 1952 + assert(istart != NULL); 2088 1953 ip = istart + cSize; 2089 - op += decompressedSize; 1954 + op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */ 2090 1955 zds->expected = 0; 2091 1956 zds->streamStage = zdss_init; 2092 1957 someMoreWork = 0; ··· 2106 1969 DEBUGLOG(4, "Consume header"); 2107 1970 FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); 2108 1971 2109 - if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ 1972 + if (zds->format == ZSTD_f_zstd1 1973 + && (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ 2110 1974 zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); 2111 1975 zds->stage = ZSTDds_skipFrame; 2112 1976 } else { ··· 2123 1985 zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); 2124 1986 RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, 2125 1987 frameParameter_windowTooLarge, ""); 1988 + if (zds->maxBlockSizeParam != 0) 1989 + zds->fParams.blockSizeMax = MIN(zds->fParams.blockSizeMax, (unsigned)zds->maxBlockSizeParam); 2126 1990 2127 1991 /* Adapt buffer sizes to frame header instructions */ 2128 1992 { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); 2129 1993 size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered 2130 - ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) 1994 + ? ZSTD_decodingBufferSize_internal(zds->fParams.windowSize, zds->fParams.frameContentSize, zds->fParams.blockSizeMax) 2131 1995 : 0; 2132 1996 2133 1997 ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); ··· 2174 2034 } 2175 2035 if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ 2176 2036 FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); 2037 + assert(ip != NULL); 2177 2038 ip += neededInSize; 2178 2039 /* Function modifies the stage so we must break */ 2179 2040 break; ··· 2189 2048 int const isSkipFrame = ZSTD_isSkipFrame(zds); 2190 2049 size_t loadedSize; 2191 2050 /* At this point we shouldn't be decompressing a block that we can stream. */ 2192 - assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); 2051 + assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip))); 2193 2052 if (isSkipFrame) { 2194 2053 loadedSize = MIN(toLoad, (size_t)(iend-ip)); 2195 2054 } else { ··· 2198 2057 "should never happen"); 2199 2058 loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip)); 2200 2059 } 2201 - ip += loadedSize; 2202 - zds->inPos += loadedSize; 2060 + if (loadedSize != 0) { 2061 + /* ip may be NULL */ 2062 + ip += loadedSize; 2063 + zds->inPos += loadedSize; 2064 + } 2203 2065 if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ 2204 2066 2205 2067 /* decode loaded input */ ··· 2212 2068 break; 2213 2069 } 2214 2070 case zdss_flush: 2215 - { size_t const toFlushSize = zds->outEnd - zds->outStart; 2071 + { 2072 + size_t const toFlushSize = zds->outEnd - zds->outStart; 2216 2073 size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize); 2217 - op += flushedSize; 2074 + 2075 + op = op ? op + flushedSize : op; 2076 + 2218 2077 zds->outStart += flushedSize; 2219 2078 if (flushedSize == toFlushSize) { /* flush completed */ 2220 2079 zds->streamStage = zdss_read; 2221 2080 if ( (zds->outBuffSize < zds->fParams.frameContentSize) 2222 - && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { 2081 + && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { 2223 2082 DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", 2224 2083 (int)(zds->outBuffSize - zds->outStart), 2225 2084 (U32)zds->fParams.blockSizeMax); ··· 2236 2089 2237 2090 default: 2238 2091 assert(0); /* impossible */ 2239 - RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ 2092 + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compilers require default to do something */ 2240 2093 } } 2241 2094 2242 2095 /* result */ ··· 2249 2102 if ((ip==istart) && (op==ostart)) { /* no forward progress */ 2250 2103 zds->noForwardProgress ++; 2251 2104 if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { 2252 - RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); 2253 - RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); 2105 + RETURN_ERROR_IF(op==oend, noForwardProgress_destFull, ""); 2106 + RETURN_ERROR_IF(ip==iend, noForwardProgress_inputEmpty, ""); 2254 2107 assert(0); 2255 2108 } 2256 2109 } else { ··· 2287 2140 void* dst, size_t dstCapacity, size_t* dstPos, 2288 2141 const void* src, size_t srcSize, size_t* srcPos) 2289 2142 { 2290 - ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; 2291 - ZSTD_inBuffer input = { src, srcSize, *srcPos }; 2292 - /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ 2293 - size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); 2294 - *dstPos = output.pos; 2295 - *srcPos = input.pos; 2296 - return cErr; 2143 + ZSTD_outBuffer output; 2144 + ZSTD_inBuffer input; 2145 + output.dst = dst; 2146 + output.size = dstCapacity; 2147 + output.pos = *dstPos; 2148 + input.src = src; 2149 + input.size = srcSize; 2150 + input.pos = *srcPos; 2151 + { size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); 2152 + *dstPos = output.pos; 2153 + *srcPos = input.pos; 2154 + return cErr; 2155 + } 2297 2156 }
+431 -293
lib/zstd/decompress/zstd_decompress_block.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 21 20 #include "../common/mem.h" /* low level memory routines */ 22 21 #define FSE_STATIC_LINKING_ONLY 23 22 #include "../common/fse.h" 24 - #define HUF_STATIC_LINKING_ONLY 25 23 #include "../common/huf.h" 26 24 #include "../common/zstd_internal.h" 27 25 #include "zstd_decompress_internal.h" /* ZSTD_DCtx */ 28 26 #include "zstd_ddict.h" /* ZSTD_DDictDictContent */ 29 27 #include "zstd_decompress_block.h" 28 + #include "../common/bits.h" /* ZSTD_highbit32 */ 30 29 31 30 /*_******************************************************* 32 31 * Macros ··· 52 51 * Block decoding 53 52 ***************************************************************/ 54 53 54 + static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx) 55 + { 56 + size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX; 57 + assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX); 58 + return blockSizeMax; 59 + } 60 + 55 61 /*! ZSTD_getcBlockSize() : 56 62 * Provides the size of compressed block from block header `src` */ 57 63 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, ··· 81 73 static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize, 82 74 const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately) 83 75 { 84 - if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) 85 - { 86 - /* room for litbuffer to fit without read faulting */ 87 - dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH; 76 + size_t const blockSizeMax = ZSTD_blockSizeMax(dctx); 77 + assert(litSize <= blockSizeMax); 78 + assert(dctx->isFrameDecompression || streaming == not_streaming); 79 + assert(expectedWriteSize <= blockSizeMax); 80 + if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) { 81 + /* If we aren't streaming, we can just put the literals after the output 82 + * of the current block. We don't need to worry about overwriting the 83 + * extDict of our window, because it doesn't exist. 84 + * So if we have space after the end of the block, just put it there. 85 + */ 86 + dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH; 88 87 dctx->litBufferEnd = dctx->litBuffer + litSize; 89 88 dctx->litBufferLocation = ZSTD_in_dst; 90 - } 91 - else if (litSize > ZSTD_LITBUFFEREXTRASIZE) 92 - { 93 - /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ 89 + } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) { 90 + /* Literals fit entirely within the extra buffer, put them there to avoid 91 + * having to split the literals. 92 + */ 93 + dctx->litBuffer = dctx->litExtraBuffer; 94 + dctx->litBufferEnd = dctx->litBuffer + litSize; 95 + dctx->litBufferLocation = ZSTD_not_in_dst; 96 + } else { 97 + assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE); 98 + /* Literals must be split between the output block and the extra lit 99 + * buffer. We fill the extra lit buffer with the tail of the literals, 100 + * and put the rest of the literals at the end of the block, with 101 + * WILDCOPY_OVERLENGTH of buffer room to allow for overreads. 102 + * This MUST not write more than our maxBlockSize beyond dst, because in 103 + * streaming mode, that could overwrite part of our extDict window. 104 + */ 94 105 if (splitImmediately) { 95 106 /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ 96 107 dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; 97 108 dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE; 98 - } 99 - else { 100 - /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */ 109 + } else { 110 + /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */ 101 111 dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize; 102 112 dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize; 103 113 } 104 114 dctx->litBufferLocation = ZSTD_split; 105 - } 106 - else 107 - { 108 - /* fits entirely within litExtraBuffer, so no split is necessary */ 109 - dctx->litBuffer = dctx->litExtraBuffer; 110 - dctx->litBufferEnd = dctx->litBuffer + litSize; 111 - dctx->litBufferLocation = ZSTD_not_in_dst; 115 + assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize); 112 116 } 113 117 } 114 118 115 - /* Hidden declaration for fullbench */ 116 - size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, 117 - const void* src, size_t srcSize, 118 - void* dst, size_t dstCapacity, const streaming_operation streaming); 119 119 /*! ZSTD_decodeLiteralsBlock() : 120 120 * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored 121 121 * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current ··· 132 116 * 133 117 * @return : nb of bytes read from src (< srcSize ) 134 118 * note : symbol not declared but exposed for fullbench */ 135 - size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, 119 + static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, 136 120 const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */ 137 121 void* dst, size_t dstCapacity, const streaming_operation streaming) 138 122 { ··· 140 124 RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); 141 125 142 126 { const BYTE* const istart = (const BYTE*) src; 143 - symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); 127 + SymbolEncodingType_e const litEncType = (SymbolEncodingType_e)(istart[0] & 3); 128 + size_t const blockSizeMax = ZSTD_blockSizeMax(dctx); 144 129 145 130 switch(litEncType) 146 131 { ··· 151 134 ZSTD_FALLTHROUGH; 152 135 153 136 case set_compressed: 154 - RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); 137 + RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3"); 155 138 { size_t lhSize, litSize, litCSize; 156 139 U32 singleStream=0; 157 140 U32 const lhlCode = (istart[0] >> 2) & 3; 158 141 U32 const lhc = MEM_readLE32(istart); 159 142 size_t hufSuccess; 160 - size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); 143 + size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); 144 + int const flags = 0 145 + | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0) 146 + | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0); 161 147 switch(lhlCode) 162 148 { 163 149 case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ ··· 184 164 break; 185 165 } 186 166 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); 187 - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); 167 + RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); 168 + if (!singleStream) 169 + RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong, 170 + "Not enough literals (%zu) for the 4-streams mode (min %u)", 171 + litSize, MIN_LITERALS_FOR_4_STREAMS); 188 172 RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); 189 173 RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, ""); 190 174 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0); ··· 200 176 201 177 if (litEncType==set_repeat) { 202 178 if (singleStream) { 203 - hufSuccess = HUF_decompress1X_usingDTable_bmi2( 179 + hufSuccess = HUF_decompress1X_usingDTable( 204 180 dctx->litBuffer, litSize, istart+lhSize, litCSize, 205 - dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); 181 + dctx->HUFptr, flags); 206 182 } else { 207 - hufSuccess = HUF_decompress4X_usingDTable_bmi2( 183 + assert(litSize >= MIN_LITERALS_FOR_4_STREAMS); 184 + hufSuccess = HUF_decompress4X_usingDTable( 208 185 dctx->litBuffer, litSize, istart+lhSize, litCSize, 209 - dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); 186 + dctx->HUFptr, flags); 210 187 } 211 188 } else { 212 189 if (singleStream) { ··· 215 190 hufSuccess = HUF_decompress1X_DCtx_wksp( 216 191 dctx->entropy.hufTable, dctx->litBuffer, litSize, 217 192 istart+lhSize, litCSize, dctx->workspace, 218 - sizeof(dctx->workspace)); 193 + sizeof(dctx->workspace), flags); 219 194 #else 220 - hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( 195 + hufSuccess = HUF_decompress1X1_DCtx_wksp( 221 196 dctx->entropy.hufTable, dctx->litBuffer, litSize, 222 197 istart+lhSize, litCSize, dctx->workspace, 223 - sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); 198 + sizeof(dctx->workspace), flags); 224 199 #endif 225 200 } else { 226 - hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( 201 + hufSuccess = HUF_decompress4X_hufOnly_wksp( 227 202 dctx->entropy.hufTable, dctx->litBuffer, litSize, 228 203 istart+lhSize, litCSize, dctx->workspace, 229 - sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); 204 + sizeof(dctx->workspace), flags); 230 205 } 231 206 } 232 207 if (dctx->litBufferLocation == ZSTD_split) 233 208 { 209 + assert(litSize > ZSTD_LITBUFFEREXTRASIZE); 234 210 ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE); 235 211 ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE); 236 212 dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; 237 213 dctx->litBufferEnd -= WILDCOPY_OVERLENGTH; 214 + assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax); 238 215 } 239 216 240 217 RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); ··· 251 224 case set_basic: 252 225 { size_t litSize, lhSize; 253 226 U32 const lhlCode = ((istart[0]) >> 2) & 3; 254 - size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); 227 + size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); 255 228 switch(lhlCode) 256 229 { 257 230 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ ··· 264 237 break; 265 238 case 3: 266 239 lhSize = 3; 240 + RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3"); 267 241 litSize = MEM_readLE24(istart) >> 4; 268 242 break; 269 243 } 270 244 271 245 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); 246 + RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); 272 247 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); 273 248 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); 274 249 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ ··· 299 270 case set_rle: 300 271 { U32 const lhlCode = ((istart[0]) >> 2) & 3; 301 272 size_t litSize, lhSize; 302 - size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); 273 + size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); 303 274 switch(lhlCode) 304 275 { 305 276 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ ··· 308 279 break; 309 280 case 1: 310 281 lhSize = 2; 282 + RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3"); 311 283 litSize = MEM_readLE16(istart) >> 4; 312 284 break; 313 285 case 3: 314 286 lhSize = 3; 287 + RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4"); 315 288 litSize = MEM_readLE24(istart) >> 4; 316 - RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); 317 289 break; 318 290 } 319 291 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); 320 - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); 292 + RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); 321 293 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); 322 294 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); 323 295 if (dctx->litBufferLocation == ZSTD_split) ··· 340 310 } 341 311 } 342 312 313 + /* Hidden declaration for fullbench */ 314 + size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, 315 + const void* src, size_t srcSize, 316 + void* dst, size_t dstCapacity); 317 + size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, 318 + const void* src, size_t srcSize, 319 + void* dst, size_t dstCapacity) 320 + { 321 + dctx->isFrameDecompression = 0; 322 + return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming); 323 + } 324 + 343 325 /* Default FSE distribution tables. 344 326 * These are pre-calculated FSE decoding tables using default distributions as defined in specification : 345 327 * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions ··· 359 317 * - start from default distributions, present in /lib/common/zstd_internal.h 360 318 * - generate tables normally, using ZSTD_buildFSETable() 361 319 * - printout the content of tables 362 - * - pretify output, report below, test with fuzzer to ensure it's correct */ 320 + * - prettify output, report below, test with fuzzer to ensure it's correct */ 363 321 364 322 /* Default FSE distribution table for Literal Lengths */ 365 323 static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = { ··· 548 506 for (i = 8; i < n; i += 8) { 549 507 MEM_write64(spread + pos + i, sv); 550 508 } 551 - pos += n; 509 + assert(n>=0); 510 + pos += (size_t)n; 552 511 } 553 512 } 554 513 /* Now we spread those positions across the table. 555 - * The benefit of doing it in two stages is that we avoid the the 514 + * The benefit of doing it in two stages is that we avoid the 556 515 * variable size inner loop, which caused lots of branch misses. 557 516 * Now we can run through all the positions without any branch misses. 558 - * We unroll the loop twice, since that is what emperically worked best. 517 + * We unroll the loop twice, since that is what empirically worked best. 559 518 */ 560 519 { 561 520 size_t position = 0; ··· 583 540 for (i=0; i<n; i++) { 584 541 tableDecode[position].baseValue = s; 585 542 position = (position + step) & tableMask; 586 - while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */ 543 + while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */ 587 544 } } 588 545 assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ 589 546 } ··· 594 551 for (u=0; u<tableSize; u++) { 595 552 U32 const symbol = tableDecode[u].baseValue; 596 553 U32 const nextState = symbolNext[symbol]++; 597 - tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) ); 554 + tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) ); 598 555 tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); 599 556 assert(nbAdditionalBits[symbol] < 255); 600 557 tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol]; ··· 646 603 * @return : nb bytes read from src, 647 604 * or an error code if it fails */ 648 605 static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr, 649 - symbolEncodingType_e type, unsigned max, U32 maxLog, 606 + SymbolEncodingType_e type, unsigned max, U32 maxLog, 650 607 const void* src, size_t srcSize, 651 608 const U32* baseValue, const U8* nbAdditionalBits, 652 609 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable, ··· 707 664 708 665 /* SeqHead */ 709 666 nbSeq = *ip++; 710 - if (!nbSeq) { 711 - *nbSeqPtr=0; 712 - RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); 713 - return 1; 714 - } 715 667 if (nbSeq > 0x7F) { 716 668 if (nbSeq == 0xFF) { 717 669 RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); ··· 719 681 } 720 682 *nbSeqPtr = nbSeq; 721 683 684 + if (nbSeq == 0) { 685 + /* No sequence : section ends immediately */ 686 + RETURN_ERROR_IF(ip != iend, corruption_detected, 687 + "extraneous data present in the Sequences section"); 688 + return (size_t)(ip - istart); 689 + } 690 + 722 691 /* FSE table descriptors */ 723 692 RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ 724 - { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); 725 - symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); 726 - symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); 693 + RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */ 694 + { SymbolEncodingType_e const LLtype = (SymbolEncodingType_e)(*ip >> 6); 695 + SymbolEncodingType_e const OFtype = (SymbolEncodingType_e)((*ip >> 4) & 3); 696 + SymbolEncodingType_e const MLtype = (SymbolEncodingType_e)((*ip >> 2) & 3); 727 697 ip++; 728 698 729 699 /* Build DTables */ ··· 875 829 /* ZSTD_safecopyDstBeforeSrc(): 876 830 * This version allows overlap with dst before src, or handles the non-overlap case with dst after src 877 831 * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */ 878 - static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) { 832 + static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) { 879 833 ptrdiff_t const diff = op - ip; 880 834 BYTE* const oend = op + length; 881 835 ··· 904 858 * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). 905 859 */ 906 860 FORCE_NOINLINE 861 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 907 862 size_t ZSTD_execSequenceEnd(BYTE* op, 908 863 BYTE* const oend, seq_t sequence, 909 864 const BYTE** litPtr, const BYTE* const litLimit, ··· 952 905 * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case. 953 906 */ 954 907 FORCE_NOINLINE 908 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 955 909 size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op, 956 910 BYTE* const oend, const BYTE* const oend_w, seq_t sequence, 957 911 const BYTE** litPtr, const BYTE* const litLimit, ··· 998 950 } 999 951 1000 952 HINT_INLINE 953 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1001 954 size_t ZSTD_execSequence(BYTE* op, 1002 955 BYTE* const oend, seq_t sequence, 1003 956 const BYTE** litPtr, const BYTE* const litLimit, ··· 1013 964 1014 965 assert(op != NULL /* Precondition */); 1015 966 assert(oend_w < oend /* No underflow */); 967 + 968 + #if defined(__aarch64__) 969 + /* prefetch sequence starting from match that will be used for copy later */ 970 + PREFETCH_L1(match); 971 + #endif 1016 972 /* Handle edge cases in a slow path: 1017 973 * - Read beyond end of literals 1018 974 * - Match end is within WILDCOPY_OVERLIMIT of oend ··· 1097 1043 } 1098 1044 1099 1045 HINT_INLINE 1046 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 1100 1047 size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op, 1101 1048 BYTE* const oend, const BYTE* const oend_w, seq_t sequence, 1102 1049 const BYTE** litPtr, const BYTE* const litLimit, ··· 1209 1154 } 1210 1155 1211 1156 /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum 1212 - * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) 1157 + * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32 1213 1158 * bits before reloading. This value is the maximum number of bytes we read 1214 1159 * after reloading when we are decoding long offsets. 1215 1160 */ ··· 1220 1165 1221 1166 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; 1222 1167 1168 + /* 1169 + * ZSTD_decodeSequence(): 1170 + * @p longOffsets : tells the decoder to reload more bit while decoding large offsets 1171 + * only used in 32-bit mode 1172 + * @return : Sequence (litL + matchL + offset) 1173 + */ 1223 1174 FORCE_INLINE_TEMPLATE seq_t 1224 - ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) 1175 + ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq) 1225 1176 { 1226 1177 seq_t seq; 1178 + /* 1179 + * ZSTD_seqSymbol is a 64 bits wide structure. 1180 + * It can be loaded in one operation 1181 + * and its fields extracted by simply shifting or bit-extracting on aarch64. 1182 + * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh 1183 + * operations that cause performance drop. This can be avoided by using this 1184 + * ZSTD_memcpy hack. 1185 + */ 1186 + #if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__)) 1187 + ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS; 1188 + ZSTD_seqSymbol* const llDInfo = &llDInfoS; 1189 + ZSTD_seqSymbol* const mlDInfo = &mlDInfoS; 1190 + ZSTD_seqSymbol* const ofDInfo = &ofDInfoS; 1191 + ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol)); 1192 + ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol)); 1193 + ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol)); 1194 + #else 1227 1195 const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state; 1228 1196 const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state; 1229 1197 const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state; 1198 + #endif 1230 1199 seq.matchLength = mlDInfo->baseValue; 1231 1200 seq.litLength = llDInfo->baseValue; 1232 1201 { U32 const ofBase = ofDInfo->baseValue; ··· 1265 1186 U32 const llnbBits = llDInfo->nbBits; 1266 1187 U32 const mlnbBits = mlDInfo->nbBits; 1267 1188 U32 const ofnbBits = ofDInfo->nbBits; 1189 + 1190 + assert(llBits <= MaxLLBits); 1191 + assert(mlBits <= MaxMLBits); 1192 + assert(ofBits <= MaxOff); 1268 1193 /* 1269 1194 * As gcc has better branch and block analyzers, sometimes it is only 1270 - * valuable to mark likelyness for clang, it gives around 3-4% of 1195 + * valuable to mark likeliness for clang, it gives around 3-4% of 1271 1196 * performance. 1272 1197 */ 1273 1198 1274 1199 /* sequence */ 1275 1200 { size_t offset; 1276 - #if defined(__clang__) 1277 - if (LIKELY(ofBits > 1)) { 1278 - #else 1279 1201 if (ofBits > 1) { 1280 - #endif 1281 1202 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); 1282 1203 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); 1283 - assert(ofBits <= MaxOff); 1204 + ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32); 1205 + ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits); 1284 1206 if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { 1285 - U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); 1207 + /* Always read extra bits, this keeps the logic simple, 1208 + * avoids branches, and avoids accidentally reading 0 bits. 1209 + */ 1210 + U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32; 1286 1211 offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); 1287 1212 BIT_reloadDStream(&seqState->DStream); 1288 - if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); 1289 - assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ 1213 + offset += BIT_readBitsFast(&seqState->DStream, extraBits); 1290 1214 } else { 1291 1215 offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ 1292 1216 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); ··· 1306 1224 } else { 1307 1225 offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); 1308 1226 { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; 1309 - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ 1227 + temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */ 1310 1228 if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; 1311 1229 seqState->prevOffset[1] = seqState->prevOffset[0]; 1312 1230 seqState->prevOffset[0] = offset = temp; ··· 1314 1232 seq.offset = offset; 1315 1233 } 1316 1234 1317 - #if defined(__clang__) 1318 - if (UNLIKELY(mlBits > 0)) 1319 - #else 1320 1235 if (mlBits > 0) 1321 - #endif 1322 1236 seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); 1323 1237 1324 1238 if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) ··· 1324 1246 /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ 1325 1247 ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); 1326 1248 1327 - #if defined(__clang__) 1328 - if (UNLIKELY(llBits > 0)) 1329 - #else 1330 1249 if (llBits > 0) 1331 - #endif 1332 1250 seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); 1333 1251 1334 1252 if (MEM_32bits()) ··· 1333 1259 DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", 1334 1260 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); 1335 1261 1336 - ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */ 1337 - ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */ 1338 - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ 1339 - ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */ 1262 + if (!isLastSeq) { 1263 + /* don't update FSE state for last Sequence */ 1264 + ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */ 1265 + ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */ 1266 + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ 1267 + ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */ 1268 + BIT_reloadDStream(&seqState->DStream); 1269 + } 1340 1270 } 1341 1271 1342 1272 return seq; 1343 1273 } 1344 1274 1345 - #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 1346 - MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) 1275 + #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) 1276 + #if DEBUGLEVEL >= 1 1277 + static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) 1347 1278 { 1348 1279 size_t const windowSize = dctx->fParams.windowSize; 1349 1280 /* No dictionary used. */ ··· 1362 1283 /* Dictionary is active. */ 1363 1284 return 1; 1364 1285 } 1286 + #endif 1365 1287 1366 - MEM_STATIC void ZSTD_assertValidSequence( 1288 + static void ZSTD_assertValidSequence( 1367 1289 ZSTD_DCtx const* dctx, 1368 1290 BYTE const* op, BYTE const* oend, 1369 1291 seq_t const seq, 1370 1292 BYTE const* prefixStart, BYTE const* virtualStart) 1371 1293 { 1372 1294 #if DEBUGLEVEL >= 1 1373 - size_t const windowSize = dctx->fParams.windowSize; 1374 - size_t const sequenceSize = seq.litLength + seq.matchLength; 1375 - BYTE const* const oLitEnd = op + seq.litLength; 1376 - DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", 1377 - (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); 1378 - assert(op <= oend); 1379 - assert((size_t)(oend - op) >= sequenceSize); 1380 - assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); 1381 - if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { 1382 - size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); 1383 - /* Offset must be within the dictionary. */ 1384 - assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); 1385 - assert(seq.offset <= windowSize + dictSize); 1386 - } else { 1387 - /* Offset must be within our window. */ 1388 - assert(seq.offset <= windowSize); 1295 + if (dctx->isFrameDecompression) { 1296 + size_t const windowSize = dctx->fParams.windowSize; 1297 + size_t const sequenceSize = seq.litLength + seq.matchLength; 1298 + BYTE const* const oLitEnd = op + seq.litLength; 1299 + DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", 1300 + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); 1301 + assert(op <= oend); 1302 + assert((size_t)(oend - op) >= sequenceSize); 1303 + assert(sequenceSize <= ZSTD_blockSizeMax(dctx)); 1304 + if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { 1305 + size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); 1306 + /* Offset must be within the dictionary. */ 1307 + assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); 1308 + assert(seq.offset <= windowSize + dictSize); 1309 + } else { 1310 + /* Offset must be within our window. */ 1311 + assert(seq.offset <= windowSize); 1312 + } 1389 1313 } 1390 1314 #else 1391 1315 (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; ··· 1404 1322 ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, 1405 1323 void* dst, size_t maxDstSize, 1406 1324 const void* seqStart, size_t seqSize, int nbSeq, 1407 - const ZSTD_longOffset_e isLongOffset, 1408 - const int frame) 1325 + const ZSTD_longOffset_e isLongOffset) 1409 1326 { 1410 1327 const BYTE* ip = (const BYTE*)seqStart; 1411 1328 const BYTE* const iend = ip + seqSize; 1412 1329 BYTE* const ostart = (BYTE*)dst; 1413 - BYTE* const oend = ostart + maxDstSize; 1330 + BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize); 1414 1331 BYTE* op = ostart; 1415 1332 const BYTE* litPtr = dctx->litPtr; 1416 1333 const BYTE* litBufferEnd = dctx->litBufferEnd; 1417 1334 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); 1418 1335 const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); 1419 1336 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); 1420 - DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer"); 1421 - (void)frame; 1337 + DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq); 1422 1338 1423 - /* Regen sequences */ 1339 + /* Literals are split between internal buffer & output buffer */ 1424 1340 if (nbSeq) { 1425 1341 seqState_t seqState; 1426 1342 dctx->fseEntropy = 1; ··· 1437 1357 BIT_DStream_completed < BIT_DStream_overflow); 1438 1358 1439 1359 /* decompress without overrunning litPtr begins */ 1440 - { 1441 - seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset); 1360 + { seq_t sequence = {0,0,0}; /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */ 1442 1361 /* Align the decompression loop to 32 + 16 bytes. 1443 1362 * 1444 1363 * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression ··· 1499 1420 #endif 1500 1421 1501 1422 /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */ 1502 - for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) { 1503 - size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); 1423 + for ( ; nbSeq; nbSeq--) { 1424 + sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); 1425 + if (litPtr + sequence.litLength > dctx->litBufferEnd) break; 1426 + { size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); 1504 1427 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) 1505 - assert(!ZSTD_isError(oneSeqSize)); 1506 - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); 1428 + assert(!ZSTD_isError(oneSeqSize)); 1429 + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); 1507 1430 #endif 1508 - if (UNLIKELY(ZSTD_isError(oneSeqSize))) 1509 - return oneSeqSize; 1510 - DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); 1511 - op += oneSeqSize; 1512 - if (UNLIKELY(!--nbSeq)) 1513 - break; 1514 - BIT_reloadDStream(&(seqState.DStream)); 1515 - sequence = ZSTD_decodeSequence(&seqState, isLongOffset); 1516 - } 1431 + if (UNLIKELY(ZSTD_isError(oneSeqSize))) 1432 + return oneSeqSize; 1433 + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); 1434 + op += oneSeqSize; 1435 + } } 1436 + DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)"); 1517 1437 1518 1438 /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */ 1519 1439 if (nbSeq > 0) { 1520 1440 const size_t leftoverLit = dctx->litBufferEnd - litPtr; 1521 - if (leftoverLit) 1522 - { 1441 + DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength); 1442 + if (leftoverLit) { 1523 1443 RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); 1524 1444 ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); 1525 1445 sequence.litLength -= leftoverLit; ··· 1527 1449 litPtr = dctx->litExtraBuffer; 1528 1450 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; 1529 1451 dctx->litBufferLocation = ZSTD_not_in_dst; 1530 - { 1531 - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); 1452 + { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); 1532 1453 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) 1533 1454 assert(!ZSTD_isError(oneSeqSize)); 1534 - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); 1455 + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); 1535 1456 #endif 1536 1457 if (UNLIKELY(ZSTD_isError(oneSeqSize))) 1537 1458 return oneSeqSize; 1538 1459 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); 1539 1460 op += oneSeqSize; 1540 - if (--nbSeq) 1541 - BIT_reloadDStream(&(seqState.DStream)); 1542 1461 } 1462 + nbSeq--; 1543 1463 } 1544 1464 } 1545 1465 1546 - if (nbSeq > 0) /* there is remaining lit from extra buffer */ 1547 - { 1466 + if (nbSeq > 0) { 1467 + /* there is remaining lit from extra buffer */ 1548 1468 1549 1469 #if defined(__x86_64__) 1550 1470 __asm__(".p2align 6"); ··· 1561 1485 # endif 1562 1486 #endif 1563 1487 1564 - for (; ; ) { 1565 - seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); 1488 + for ( ; nbSeq ; nbSeq--) { 1489 + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); 1566 1490 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); 1567 1491 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) 1568 1492 assert(!ZSTD_isError(oneSeqSize)); 1569 - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); 1493 + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); 1570 1494 #endif 1571 1495 if (UNLIKELY(ZSTD_isError(oneSeqSize))) 1572 1496 return oneSeqSize; 1573 1497 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); 1574 1498 op += oneSeqSize; 1575 - if (UNLIKELY(!--nbSeq)) 1576 - break; 1577 - BIT_reloadDStream(&(seqState.DStream)); 1578 1499 } 1579 1500 } 1580 1501 1581 1502 /* check if reached exact end */ 1582 1503 DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq); 1583 1504 RETURN_ERROR_IF(nbSeq, corruption_detected, ""); 1584 - RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); 1505 + DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed); 1506 + RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); 1585 1507 /* save reps for next block */ 1586 1508 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); } 1587 1509 } 1588 1510 1589 1511 /* last literal segment */ 1590 - if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ 1591 - { 1592 - size_t const lastLLSize = litBufferEnd - litPtr; 1512 + if (dctx->litBufferLocation == ZSTD_split) { 1513 + /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ 1514 + size_t const lastLLSize = (size_t)(litBufferEnd - litPtr); 1515 + DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize); 1593 1516 RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); 1594 1517 if (op != NULL) { 1595 1518 ZSTD_memmove(op, litPtr, lastLLSize); ··· 1598 1523 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; 1599 1524 dctx->litBufferLocation = ZSTD_not_in_dst; 1600 1525 } 1601 - { size_t const lastLLSize = litBufferEnd - litPtr; 1526 + /* copy last literals from internal buffer */ 1527 + { size_t const lastLLSize = (size_t)(litBufferEnd - litPtr); 1528 + DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize); 1602 1529 RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); 1603 1530 if (op != NULL) { 1604 1531 ZSTD_memcpy(op, litPtr, lastLLSize); 1605 1532 op += lastLLSize; 1606 - } 1607 - } 1533 + } } 1608 1534 1609 - return op-ostart; 1535 + DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart)); 1536 + return (size_t)(op - ostart); 1610 1537 } 1611 1538 1612 1539 FORCE_INLINE_TEMPLATE size_t ··· 1616 1539 ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, 1617 1540 void* dst, size_t maxDstSize, 1618 1541 const void* seqStart, size_t seqSize, int nbSeq, 1619 - const ZSTD_longOffset_e isLongOffset, 1620 - const int frame) 1542 + const ZSTD_longOffset_e isLongOffset) 1621 1543 { 1622 1544 const BYTE* ip = (const BYTE*)seqStart; 1623 1545 const BYTE* const iend = ip + seqSize; 1624 1546 BYTE* const ostart = (BYTE*)dst; 1625 - BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer; 1547 + BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer; 1626 1548 BYTE* op = ostart; 1627 1549 const BYTE* litPtr = dctx->litPtr; 1628 1550 const BYTE* const litEnd = litPtr + dctx->litSize; 1629 1551 const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart); 1630 1552 const BYTE* const vBase = (const BYTE*)(dctx->virtualStart); 1631 1553 const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd); 1632 - DEBUGLOG(5, "ZSTD_decompressSequences_body"); 1633 - (void)frame; 1554 + DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq); 1634 1555 1635 1556 /* Regen sequences */ 1636 1557 if (nbSeq) { ··· 1642 1567 ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); 1643 1568 ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); 1644 1569 assert(dst != NULL); 1645 - 1646 - ZSTD_STATIC_ASSERT( 1647 - BIT_DStream_unfinished < BIT_DStream_completed && 1648 - BIT_DStream_endOfBuffer < BIT_DStream_completed && 1649 - BIT_DStream_completed < BIT_DStream_overflow); 1650 1570 1651 1571 #if defined(__x86_64__) 1652 1572 __asm__(".p2align 6"); ··· 1657 1587 # endif 1658 1588 #endif 1659 1589 1660 - for ( ; ; ) { 1661 - seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); 1590 + for ( ; nbSeq ; nbSeq--) { 1591 + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); 1662 1592 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); 1663 1593 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) 1664 1594 assert(!ZSTD_isError(oneSeqSize)); 1665 - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); 1595 + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); 1666 1596 #endif 1667 1597 if (UNLIKELY(ZSTD_isError(oneSeqSize))) 1668 1598 return oneSeqSize; 1669 1599 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); 1670 1600 op += oneSeqSize; 1671 - if (UNLIKELY(!--nbSeq)) 1672 - break; 1673 - BIT_reloadDStream(&(seqState.DStream)); 1674 1601 } 1675 1602 1676 1603 /* check if reached exact end */ 1677 - DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); 1678 - RETURN_ERROR_IF(nbSeq, corruption_detected, ""); 1679 - RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); 1604 + assert(nbSeq == 0); 1605 + RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); 1680 1606 /* save reps for next block */ 1681 1607 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); } 1682 1608 } 1683 1609 1684 1610 /* last literal segment */ 1685 - { size_t const lastLLSize = litEnd - litPtr; 1611 + { size_t const lastLLSize = (size_t)(litEnd - litPtr); 1612 + DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize); 1686 1613 RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); 1687 1614 if (op != NULL) { 1688 1615 ZSTD_memcpy(op, litPtr, lastLLSize); 1689 1616 op += lastLLSize; 1690 - } 1691 - } 1617 + } } 1692 1618 1693 - return op-ostart; 1619 + DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart)); 1620 + return (size_t)(op - ostart); 1694 1621 } 1695 1622 1696 1623 static size_t 1697 1624 ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, 1698 1625 void* dst, size_t maxDstSize, 1699 1626 const void* seqStart, size_t seqSize, int nbSeq, 1700 - const ZSTD_longOffset_e isLongOffset, 1701 - const int frame) 1627 + const ZSTD_longOffset_e isLongOffset) 1702 1628 { 1703 - return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1629 + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); 1704 1630 } 1705 1631 1706 1632 static size_t 1707 1633 ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx, 1708 1634 void* dst, size_t maxDstSize, 1709 1635 const void* seqStart, size_t seqSize, int nbSeq, 1710 - const ZSTD_longOffset_e isLongOffset, 1711 - const int frame) 1636 + const ZSTD_longOffset_e isLongOffset) 1712 1637 { 1713 - return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1638 + return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); 1714 1639 } 1715 1640 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ 1716 1641 1717 1642 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT 1718 1643 1719 - FORCE_INLINE_TEMPLATE size_t 1720 - ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence, 1644 + FORCE_INLINE_TEMPLATE 1645 + 1646 + size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence, 1721 1647 const BYTE* const prefixStart, const BYTE* const dictEnd) 1722 1648 { 1723 1649 prefetchPos += sequence.litLength; 1724 1650 { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart; 1725 - const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. 1726 - * No consequence though : memory address is only used for prefetching, not for dereferencing */ 1651 + /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. 1652 + * No consequence though : memory address is only used for prefetching, not for dereferencing */ 1653 + const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset); 1727 1654 PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ 1728 1655 } 1729 1656 return prefetchPos + sequence.matchLength; ··· 1735 1668 ZSTD_DCtx* dctx, 1736 1669 void* dst, size_t maxDstSize, 1737 1670 const void* seqStart, size_t seqSize, int nbSeq, 1738 - const ZSTD_longOffset_e isLongOffset, 1739 - const int frame) 1671 + const ZSTD_longOffset_e isLongOffset) 1740 1672 { 1741 1673 const BYTE* ip = (const BYTE*)seqStart; 1742 1674 const BYTE* const iend = ip + seqSize; 1743 1675 BYTE* const ostart = (BYTE*)dst; 1744 - BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize; 1676 + BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize); 1745 1677 BYTE* op = ostart; 1746 1678 const BYTE* litPtr = dctx->litPtr; 1747 1679 const BYTE* litBufferEnd = dctx->litBufferEnd; 1748 1680 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); 1749 1681 const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); 1750 1682 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); 1751 - (void)frame; 1752 1683 1753 1684 /* Regen sequences */ 1754 1685 if (nbSeq) { ··· 1771 1706 ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); 1772 1707 1773 1708 /* prepare in advance */ 1774 - for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) { 1775 - seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); 1709 + for (seqNb=0; seqNb<seqAdvance; seqNb++) { 1710 + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1); 1776 1711 prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); 1777 1712 sequences[seqNb] = sequence; 1778 1713 } 1779 - RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, ""); 1780 1714 1781 1715 /* decompress without stomping litBuffer */ 1782 - for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) { 1783 - seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset); 1784 - size_t oneSeqSize; 1716 + for (; seqNb < nbSeq; seqNb++) { 1717 + seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1); 1785 1718 1786 - if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) 1787 - { 1719 + if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) { 1788 1720 /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */ 1789 1721 const size_t leftoverLit = dctx->litBufferEnd - litPtr; 1790 1722 if (leftoverLit) ··· 1794 1732 litPtr = dctx->litExtraBuffer; 1795 1733 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; 1796 1734 dctx->litBufferLocation = ZSTD_not_in_dst; 1797 - oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); 1735 + { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); 1798 1736 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) 1799 - assert(!ZSTD_isError(oneSeqSize)); 1800 - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); 1737 + assert(!ZSTD_isError(oneSeqSize)); 1738 + ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); 1801 1739 #endif 1802 - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; 1740 + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; 1803 1741 1804 - prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); 1805 - sequences[seqNb & STORED_SEQS_MASK] = sequence; 1806 - op += oneSeqSize; 1807 - } 1742 + prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); 1743 + sequences[seqNb & STORED_SEQS_MASK] = sequence; 1744 + op += oneSeqSize; 1745 + } } 1808 1746 else 1809 1747 { 1810 1748 /* lit buffer is either wholly contained in first or second split, or not split at all*/ 1811 - oneSeqSize = dctx->litBufferLocation == ZSTD_split ? 1749 + size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ? 1812 1750 ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) : 1813 1751 ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); 1814 1752 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) 1815 1753 assert(!ZSTD_isError(oneSeqSize)); 1816 - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); 1754 + ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); 1817 1755 #endif 1818 1756 if (ZSTD_isError(oneSeqSize)) return oneSeqSize; 1819 1757 ··· 1822 1760 op += oneSeqSize; 1823 1761 } 1824 1762 } 1825 - RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, ""); 1763 + RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); 1826 1764 1827 1765 /* finish queue */ 1828 1766 seqNb -= seqAdvance; 1829 1767 for ( ; seqNb<nbSeq ; seqNb++) { 1830 1768 seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]); 1831 - if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) 1832 - { 1769 + if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) { 1833 1770 const size_t leftoverLit = dctx->litBufferEnd - litPtr; 1834 - if (leftoverLit) 1835 - { 1771 + if (leftoverLit) { 1836 1772 RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); 1837 1773 ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); 1838 1774 sequence->litLength -= leftoverLit; ··· 1839 1779 litPtr = dctx->litExtraBuffer; 1840 1780 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; 1841 1781 dctx->litBufferLocation = ZSTD_not_in_dst; 1842 - { 1843 - size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); 1782 + { size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); 1844 1783 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) 1845 1784 assert(!ZSTD_isError(oneSeqSize)); 1846 - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); 1785 + ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); 1847 1786 #endif 1848 1787 if (ZSTD_isError(oneSeqSize)) return oneSeqSize; 1849 1788 op += oneSeqSize; ··· 1855 1796 ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); 1856 1797 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) 1857 1798 assert(!ZSTD_isError(oneSeqSize)); 1858 - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); 1799 + ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); 1859 1800 #endif 1860 1801 if (ZSTD_isError(oneSeqSize)) return oneSeqSize; 1861 1802 op += oneSeqSize; ··· 1867 1808 } 1868 1809 1869 1810 /* last literal segment */ 1870 - if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */ 1871 - { 1811 + if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */ 1872 1812 size_t const lastLLSize = litBufferEnd - litPtr; 1873 1813 RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); 1874 1814 if (op != NULL) { ··· 1885 1827 } 1886 1828 } 1887 1829 1888 - return op-ostart; 1830 + return (size_t)(op - ostart); 1889 1831 } 1890 1832 1891 1833 static size_t 1892 1834 ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, 1893 1835 void* dst, size_t maxDstSize, 1894 1836 const void* seqStart, size_t seqSize, int nbSeq, 1895 - const ZSTD_longOffset_e isLongOffset, 1896 - const int frame) 1837 + const ZSTD_longOffset_e isLongOffset) 1897 1838 { 1898 - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1839 + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); 1899 1840 } 1900 1841 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ 1901 1842 ··· 1908 1851 ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, 1909 1852 void* dst, size_t maxDstSize, 1910 1853 const void* seqStart, size_t seqSize, int nbSeq, 1911 - const ZSTD_longOffset_e isLongOffset, 1912 - const int frame) 1854 + const ZSTD_longOffset_e isLongOffset) 1913 1855 { 1914 - return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1856 + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); 1915 1857 } 1916 1858 static BMI2_TARGET_ATTRIBUTE size_t 1917 1859 DONT_VECTORIZE 1918 1860 ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx, 1919 1861 void* dst, size_t maxDstSize, 1920 1862 const void* seqStart, size_t seqSize, int nbSeq, 1921 - const ZSTD_longOffset_e isLongOffset, 1922 - const int frame) 1863 + const ZSTD_longOffset_e isLongOffset) 1923 1864 { 1924 - return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1865 + return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); 1925 1866 } 1926 1867 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ 1927 1868 ··· 1928 1873 ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, 1929 1874 void* dst, size_t maxDstSize, 1930 1875 const void* seqStart, size_t seqSize, int nbSeq, 1931 - const ZSTD_longOffset_e isLongOffset, 1932 - const int frame) 1876 + const ZSTD_longOffset_e isLongOffset) 1933 1877 { 1934 - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1878 + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); 1935 1879 } 1936 1880 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ 1937 1881 1938 1882 #endif /* DYNAMIC_BMI2 */ 1939 1883 1940 - typedef size_t (*ZSTD_decompressSequences_t)( 1941 - ZSTD_DCtx* dctx, 1942 - void* dst, size_t maxDstSize, 1943 - const void* seqStart, size_t seqSize, int nbSeq, 1944 - const ZSTD_longOffset_e isLongOffset, 1945 - const int frame); 1946 - 1947 1884 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG 1948 1885 static size_t 1949 1886 ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, 1950 1887 const void* seqStart, size_t seqSize, int nbSeq, 1951 - const ZSTD_longOffset_e isLongOffset, 1952 - const int frame) 1888 + const ZSTD_longOffset_e isLongOffset) 1953 1889 { 1954 1890 DEBUGLOG(5, "ZSTD_decompressSequences"); 1955 1891 #if DYNAMIC_BMI2 1956 1892 if (ZSTD_DCtx_get_bmi2(dctx)) { 1957 - return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1893 + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); 1958 1894 } 1959 1895 #endif 1960 - return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1896 + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); 1961 1897 } 1962 1898 static size_t 1963 1899 ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, 1964 1900 const void* seqStart, size_t seqSize, int nbSeq, 1965 - const ZSTD_longOffset_e isLongOffset, 1966 - const int frame) 1901 + const ZSTD_longOffset_e isLongOffset) 1967 1902 { 1968 1903 DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer"); 1969 1904 #if DYNAMIC_BMI2 1970 1905 if (ZSTD_DCtx_get_bmi2(dctx)) { 1971 - return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1906 + return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); 1972 1907 } 1973 1908 #endif 1974 - return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1909 + return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); 1975 1910 } 1976 1911 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ 1977 1912 ··· 1976 1931 ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, 1977 1932 void* dst, size_t maxDstSize, 1978 1933 const void* seqStart, size_t seqSize, int nbSeq, 1979 - const ZSTD_longOffset_e isLongOffset, 1980 - const int frame) 1934 + const ZSTD_longOffset_e isLongOffset) 1981 1935 { 1982 1936 DEBUGLOG(5, "ZSTD_decompressSequencesLong"); 1983 1937 #if DYNAMIC_BMI2 1984 1938 if (ZSTD_DCtx_get_bmi2(dctx)) { 1985 - return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1939 + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); 1986 1940 } 1987 1941 #endif 1988 - return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); 1942 + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); 1989 1943 } 1990 1944 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ 1991 1945 1992 1946 1947 + /* 1948 + * @returns The total size of the history referenceable by zstd, including 1949 + * both the prefix and the extDict. At @p op any offset larger than this 1950 + * is invalid. 1951 + */ 1952 + static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart) 1953 + { 1954 + return (size_t)(op - virtualStart); 1955 + } 1993 1956 1994 - #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ 1995 - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) 1996 - /* ZSTD_getLongOffsetsShare() : 1957 + typedef struct { 1958 + unsigned longOffsetShare; 1959 + unsigned maxNbAdditionalBits; 1960 + } ZSTD_OffsetInfo; 1961 + 1962 + /* ZSTD_getOffsetInfo() : 1997 1963 * condition : offTable must be valid 1998 1964 * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) 1999 - * compared to maximum possible of (1<<OffFSELog) */ 2000 - static unsigned 2001 - ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable) 1965 + * compared to maximum possible of (1<<OffFSELog), 1966 + * as well as the maximum number additional bits required. 1967 + */ 1968 + static ZSTD_OffsetInfo 1969 + ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq) 2002 1970 { 2003 - const void* ptr = offTable; 2004 - U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog; 2005 - const ZSTD_seqSymbol* table = offTable + 1; 2006 - U32 const max = 1 << tableLog; 2007 - U32 u, total = 0; 2008 - DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog); 1971 + ZSTD_OffsetInfo info = {0, 0}; 1972 + /* If nbSeq == 0, then the offTable is uninitialized, but we have 1973 + * no sequences, so both values should be 0. 1974 + */ 1975 + if (nbSeq != 0) { 1976 + const void* ptr = offTable; 1977 + U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog; 1978 + const ZSTD_seqSymbol* table = offTable + 1; 1979 + U32 const max = 1 << tableLog; 1980 + U32 u; 1981 + DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog); 2009 1982 2010 - assert(max <= (1 << OffFSELog)); /* max not too large */ 2011 - for (u=0; u<max; u++) { 2012 - if (table[u].nbAdditionalBits > 22) total += 1; 1983 + assert(max <= (1 << OffFSELog)); /* max not too large */ 1984 + for (u=0; u<max; u++) { 1985 + info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits); 1986 + if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1; 1987 + } 1988 + 1989 + assert(tableLog <= OffFSELog); 1990 + info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */ 2013 1991 } 2014 1992 2015 - assert(tableLog <= OffFSELog); 2016 - total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ 2017 - 2018 - return total; 1993 + return info; 2019 1994 } 2020 - #endif 1995 + 1996 + /* 1997 + * @returns The maximum offset we can decode in one read of our bitstream, without 1998 + * reloading more bits in the middle of the offset bits read. Any offsets larger 1999 + * than this must use the long offset decoder. 2000 + */ 2001 + static size_t ZSTD_maxShortOffset(void) 2002 + { 2003 + if (MEM_64bits()) { 2004 + /* We can decode any offset without reloading bits. 2005 + * This might change if the max window size grows. 2006 + */ 2007 + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); 2008 + return (size_t)-1; 2009 + } else { 2010 + /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1. 2011 + * This offBase would require STREAM_ACCUMULATOR_MIN extra bits. 2012 + * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset. 2013 + */ 2014 + size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1; 2015 + size_t const maxOffset = maxOffbase - ZSTD_REP_NUM; 2016 + assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN); 2017 + return maxOffset; 2018 + } 2019 + } 2021 2020 2022 2021 size_t 2023 2022 ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, 2024 2023 void* dst, size_t dstCapacity, 2025 - const void* src, size_t srcSize, const int frame, const streaming_operation streaming) 2024 + const void* src, size_t srcSize, const streaming_operation streaming) 2026 2025 { /* blockType == blockCompressed */ 2027 2026 const BYTE* ip = (const BYTE*)src; 2028 - /* isLongOffset must be true if there are long offsets. 2029 - * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. 2030 - * We don't expect that to be the case in 64-bit mode. 2031 - * In block mode, window size is not known, so we have to be conservative. 2032 - * (note: but it could be evaluated from current-lowLimit) 2033 - */ 2034 - ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); 2035 - DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); 2027 + DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize); 2036 2028 2037 - RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); 2029 + /* Note : the wording of the specification 2030 + * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx). 2031 + * This generally does not happen, as it makes little sense, 2032 + * since an uncompressed block would feature same size and have no decompression cost. 2033 + * Also, note that decoder from reference libzstd before < v1.5.4 2034 + * would consider this edge case as an error. 2035 + * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx) 2036 + * for broader compatibility with the deployed ecosystem of zstd decoders */ 2037 + RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, ""); 2038 2038 2039 2039 /* Decode literals section */ 2040 2040 { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming); 2041 - DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); 2041 + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize); 2042 2042 if (ZSTD_isError(litCSize)) return litCSize; 2043 2043 ip += litCSize; 2044 2044 srcSize -= litCSize; ··· 2091 2001 2092 2002 /* Build Decoding Tables */ 2093 2003 { 2004 + /* Compute the maximum block size, which must also work when !frame and fParams are unset. 2005 + * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t. 2006 + */ 2007 + size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx)); 2008 + size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart); 2009 + /* isLongOffset must be true if there are long offsets. 2010 + * Offsets are long if they are larger than ZSTD_maxShortOffset(). 2011 + * We don't expect that to be the case in 64-bit mode. 2012 + * 2013 + * We check here to see if our history is large enough to allow long offsets. 2014 + * If it isn't, then we can't possible have (valid) long offsets. If the offset 2015 + * is invalid, then it is okay to read it incorrectly. 2016 + * 2017 + * If isLongOffsets is true, then we will later check our decoding table to see 2018 + * if it is even possible to generate long offsets. 2019 + */ 2020 + ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset())); 2094 2021 /* These macros control at build-time which decompressor implementation 2095 2022 * we use. If neither is defined, we do some inspection and dispatch at 2096 2023 * runtime. ··· 2115 2008 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ 2116 2009 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) 2117 2010 int usePrefetchDecoder = dctx->ddictIsCold; 2011 + #else 2012 + /* Set to 1 to avoid computing offset info if we don't need to. 2013 + * Otherwise this value is ignored. 2014 + */ 2015 + int usePrefetchDecoder = 1; 2118 2016 #endif 2119 2017 int nbSeq; 2120 2018 size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); ··· 2127 2015 ip += seqHSize; 2128 2016 srcSize -= seqHSize; 2129 2017 2130 - RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); 2018 + RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); 2019 + RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall, 2020 + "invalid dst"); 2131 2021 2132 - #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ 2133 - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) 2134 - if ( !usePrefetchDecoder 2135 - && (!frame || (dctx->fParams.windowSize > (1<<24))) 2136 - && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ 2137 - U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); 2138 - U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ 2139 - usePrefetchDecoder = (shareLongOffsets >= minShare); 2022 + /* If we could potentially have long offsets, or we might want to use the prefetch decoder, 2023 + * compute information about the share of long offsets, and the maximum nbAdditionalBits. 2024 + * NOTE: could probably use a larger nbSeq limit 2025 + */ 2026 + if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) { 2027 + ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq); 2028 + if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) { 2029 + /* If isLongOffset, but the maximum number of additional bits that we see in our table is small 2030 + * enough, then we know it is impossible to have too long an offset in this block, so we can 2031 + * use the regular offset decoder. 2032 + */ 2033 + isLongOffset = ZSTD_lo_isRegularOffset; 2034 + } 2035 + if (!usePrefetchDecoder) { 2036 + U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ 2037 + usePrefetchDecoder = (info.longOffsetShare >= minShare); 2038 + } 2140 2039 } 2141 - #endif 2142 2040 2143 2041 dctx->ddictIsCold = 0; 2144 2042 2145 2043 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ 2146 2044 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) 2147 - if (usePrefetchDecoder) 2045 + if (usePrefetchDecoder) { 2046 + #else 2047 + (void)usePrefetchDecoder; 2048 + { 2148 2049 #endif 2149 2050 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT 2150 - return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); 2051 + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); 2151 2052 #endif 2053 + } 2152 2054 2153 2055 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG 2154 2056 /* else */ 2155 2057 if (dctx->litBufferLocation == ZSTD_split) 2156 - return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); 2058 + return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); 2157 2059 else 2158 - return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); 2060 + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); 2159 2061 #endif 2160 2062 } 2161 2063 } 2162 2064 2163 2065 2066 + ZSTD_ALLOW_POINTER_OVERFLOW_ATTR 2164 2067 void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) 2165 2068 { 2166 2069 if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */ ··· 2187 2060 } 2188 2061 2189 2062 2063 + size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx, 2064 + void* dst, size_t dstCapacity, 2065 + const void* src, size_t srcSize) 2066 + { 2067 + size_t dSize; 2068 + dctx->isFrameDecompression = 0; 2069 + ZSTD_checkContinuity(dctx, dst, dstCapacity); 2070 + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming); 2071 + FORWARD_IF_ERROR(dSize, ""); 2072 + dctx->previousDstEnd = (char*)dst + dSize; 2073 + return dSize; 2074 + } 2075 + 2076 + 2077 + /* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */ 2190 2078 size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, 2191 2079 void* dst, size_t dstCapacity, 2192 2080 const void* src, size_t srcSize) 2193 2081 { 2194 - size_t dSize; 2195 - ZSTD_checkContinuity(dctx, dst, dstCapacity); 2196 - dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming); 2197 - dctx->previousDstEnd = (char*)dst + dSize; 2198 - return dSize; 2082 + return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize); 2199 2083 }
+8 -2
lib/zstd/decompress/zstd_decompress_block.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 48 47 */ 49 48 size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, 50 49 void* dst, size_t dstCapacity, 51 - const void* src, size_t srcSize, const int frame, const streaming_operation streaming); 50 + const void* src, size_t srcSize, const streaming_operation streaming); 52 51 53 52 /* ZSTD_buildFSETable() : 54 53 * generate FSE decoding table for one symbol (ll, ml or off) ··· 64 63 const U32* baseValue, const U8* nbAdditionalBits, 65 64 unsigned tableLog, void* wksp, size_t wkspSize, 66 65 int bmi2); 66 + 67 + /* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */ 68 + size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx, 69 + void* dst, size_t dstCapacity, 70 + const void* src, size_t srcSize); 67 71 68 72 69 73 #endif /* ZSTD_DEC_BLOCK_H */
+12 -7
lib/zstd/decompress/zstd_decompress_internal.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 1 2 /* 2 - * Copyright (c) Yann Collet, Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 3 4 * All rights reserved. 4 5 * 5 6 * This source code is licensed under both the BSD-style license (found in the ··· 76 75 77 76 #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) 78 77 #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) 78 + #define ZSTD_HUFFDTABLE_CAPACITY_LOG 12 79 79 80 80 typedef struct { 81 81 ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ 82 82 ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ 83 83 ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ 84 - HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ 84 + HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */ 85 85 U32 rep[ZSTD_REP_NUM]; 86 86 U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; 87 87 } ZSTD_entropyDTables_t; ··· 137 135 const void* virtualStart; /* virtual start of previous segment if it was just before current one */ 138 136 const void* dictEnd; /* end of previous segment */ 139 137 size_t expected; 140 - ZSTD_frameHeader fParams; 138 + ZSTD_FrameHeader fParams; 141 139 U64 processedCSize; 142 140 U64 decodedSize; 143 141 blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ ··· 154 152 size_t litSize; 155 153 size_t rleSize; 156 154 size_t staticSize; 157 - #if DYNAMIC_BMI2 != 0 155 + int isFrameDecompression; 156 + #if DYNAMIC_BMI2 158 157 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ 159 158 #endif 160 159 ··· 167 164 ZSTD_dictUses_e dictUses; 168 165 ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ 169 166 ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ 167 + int disableHufAsm; 168 + int maxBlockSizeParam; 170 169 171 170 /* streaming */ 172 171 ZSTD_dStreamStage streamStage; ··· 204 199 }; /* typedef'd to ZSTD_DCtx within "zstd.h" */ 205 200 206 201 MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) { 207 - #if DYNAMIC_BMI2 != 0 208 - return dctx->bmi2; 202 + #if DYNAMIC_BMI2 203 + return dctx->bmi2; 209 204 #else 210 205 (void)dctx; 211 - return 0; 206 + return 0; 212 207 #endif 213 208 } 214 209
+1 -1
lib/zstd/decompress_sources.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ 2 2 /* 3 - * Copyright (c) Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 4 4 * All rights reserved. 5 5 * 6 6 * This source code is licensed under both the BSD-style license (found in the
+1 -4
lib/zstd/zstd_common_module.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 2 2 /* 3 - * Copyright (c) Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 4 4 * All rights reserved. 5 5 * 6 6 * This source code is licensed under both the BSD-style license (found in the ··· 24 24 EXPORT_SYMBOL_GPL(ZSTD_isError); 25 25 EXPORT_SYMBOL_GPL(ZSTD_getErrorName); 26 26 EXPORT_SYMBOL_GPL(ZSTD_getErrorCode); 27 - EXPORT_SYMBOL_GPL(ZSTD_customMalloc); 28 - EXPORT_SYMBOL_GPL(ZSTD_customCalloc); 29 - EXPORT_SYMBOL_GPL(ZSTD_customFree); 30 27 31 28 MODULE_LICENSE("Dual BSD/GPL"); 32 29 MODULE_DESCRIPTION("Zstd Common");
+74 -1
lib/zstd/zstd_compress_module.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 2 2 /* 3 - * Copyright (c) Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 4 4 * All rights reserved. 5 5 * 6 6 * This source code is licensed under both the BSD-style license (found in the ··· 16 16 17 17 #include "common/zstd_deps.h" 18 18 #include "common/zstd_internal.h" 19 + #include "compress/zstd_compress_internal.h" 19 20 20 21 #define ZSTD_FORWARD_IF_ERR(ret) \ 21 22 do { \ ··· 93 92 } 94 93 EXPORT_SYMBOL(zstd_get_cparams); 95 94 95 + size_t zstd_cctx_set_param(zstd_cctx *cctx, ZSTD_cParameter param, int value) 96 + { 97 + return ZSTD_CCtx_setParameter(cctx, param, value); 98 + } 99 + EXPORT_SYMBOL(zstd_cctx_set_param); 100 + 96 101 size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams) 97 102 { 98 103 return ZSTD_estimateCCtxSize_usingCParams(*cparams); 99 104 } 100 105 EXPORT_SYMBOL(zstd_cctx_workspace_bound); 106 + 107 + // Used by zstd_cctx_workspace_bound_with_ext_seq_prod() 108 + static size_t dummy_external_sequence_producer( 109 + void *sequenceProducerState, 110 + ZSTD_Sequence *outSeqs, size_t outSeqsCapacity, 111 + const void *src, size_t srcSize, 112 + const void *dict, size_t dictSize, 113 + int compressionLevel, 114 + size_t windowSize) 115 + { 116 + (void)sequenceProducerState; 117 + (void)outSeqs; (void)outSeqsCapacity; 118 + (void)src; (void)srcSize; 119 + (void)dict; (void)dictSize; 120 + (void)compressionLevel; 121 + (void)windowSize; 122 + return ZSTD_SEQUENCE_PRODUCER_ERROR; 123 + } 124 + 125 + static void init_cctx_params_from_compress_params( 126 + ZSTD_CCtx_params *cctx_params, 127 + const zstd_compression_parameters *compress_params) 128 + { 129 + ZSTD_parameters zstd_params; 130 + memset(&zstd_params, 0, sizeof(zstd_params)); 131 + zstd_params.cParams = *compress_params; 132 + ZSTD_CCtxParams_init_advanced(cctx_params, zstd_params); 133 + } 134 + 135 + size_t zstd_cctx_workspace_bound_with_ext_seq_prod(const zstd_compression_parameters *compress_params) 136 + { 137 + ZSTD_CCtx_params cctx_params; 138 + init_cctx_params_from_compress_params(&cctx_params, compress_params); 139 + ZSTD_CCtxParams_registerSequenceProducer(&cctx_params, NULL, dummy_external_sequence_producer); 140 + return ZSTD_estimateCCtxSize_usingCCtxParams(&cctx_params); 141 + } 142 + EXPORT_SYMBOL(zstd_cctx_workspace_bound_with_ext_seq_prod); 143 + 144 + size_t zstd_cstream_workspace_bound_with_ext_seq_prod(const zstd_compression_parameters *compress_params) 145 + { 146 + ZSTD_CCtx_params cctx_params; 147 + init_cctx_params_from_compress_params(&cctx_params, compress_params); 148 + ZSTD_CCtxParams_registerSequenceProducer(&cctx_params, NULL, dummy_external_sequence_producer); 149 + return ZSTD_estimateCStreamSize_usingCCtxParams(&cctx_params); 150 + } 151 + EXPORT_SYMBOL(zstd_cstream_workspace_bound_with_ext_seq_prod); 101 152 102 153 zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size) 103 154 { ··· 261 208 return ZSTD_endStream(cstream, output); 262 209 } 263 210 EXPORT_SYMBOL(zstd_end_stream); 211 + 212 + void zstd_register_sequence_producer( 213 + zstd_cctx *cctx, 214 + void* sequence_producer_state, 215 + zstd_sequence_producer_f sequence_producer 216 + ) { 217 + ZSTD_registerSequenceProducer(cctx, sequence_producer_state, sequence_producer); 218 + } 219 + EXPORT_SYMBOL(zstd_register_sequence_producer); 220 + 221 + size_t zstd_compress_sequences_and_literals(zstd_cctx *cctx, void* dst, size_t dst_capacity, 222 + const zstd_sequence *in_seqs, size_t in_seqs_size, 223 + const void* literals, size_t lit_size, size_t lit_capacity, 224 + size_t decompressed_size) 225 + { 226 + return ZSTD_compressSequencesAndLiterals(cctx, dst, dst_capacity, in_seqs, 227 + in_seqs_size, literals, lit_size, 228 + lit_capacity, decompressed_size); 229 + } 230 + EXPORT_SYMBOL(zstd_compress_sequences_and_literals); 264 231 265 232 MODULE_LICENSE("Dual BSD/GPL"); 266 233 MODULE_DESCRIPTION("Zstd Compressor");
+2 -2
lib/zstd/zstd_decompress_module.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause 2 2 /* 3 - * Copyright (c) Facebook, Inc. 3 + * Copyright (c) Meta Platforms, Inc. and affiliates. 4 4 * All rights reserved. 5 5 * 6 6 * This source code is licensed under both the BSD-style license (found in the ··· 113 113 114 114 size_t zstd_reset_dstream(zstd_dstream *dstream) 115 115 { 116 - return ZSTD_resetDStream(dstream); 116 + return ZSTD_DCtx_reset(dstream, ZSTD_reset_session_only); 117 117 } 118 118 EXPORT_SYMBOL(zstd_reset_dstream); 119 119