mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-12-28 16:56:26 +00:00
zstd: import usptream v1.5.2
Updates the kernel's zstd library to v1.5.2, the latest zstd release. The upstream tag it is updated to is `v1.5.2-kernel`, which contains several cherry-picked commits on top of the v1.5.2 release which are required for the kernel update. I will create this tag once the PR is ready to merge, until then reference the temporary upstream branch `v1.5.2-kernel-cherrypicks`. I plan to submit this patch as part of the v6.2 merge window. I've done basic build testing & testing on x86-64, i386, and aarch64. I'm merging these patches into my `zstd-next` branch, which is pulled into `linux-next` for further testing. I've benchmarked BtrFS with zstd compression on a x86-64 machine, and saw these results. Decompression speed is a small win across the board. The lower compression levels 1-4 see both compression speed and compression ratio wins. The higher compression levels see a small compression speed loss and about neutral ratio. I expect the lower compression levels to be used much more heavily than the high compression levels, so this should be a net win. Level CTime DTime Ratio 1 -2.95% -1.1% -0.7% 3 -3.5% -1.2% -0.5% 5 +3.7% -1.0% +0.0% 7 +3.2% -0.9% +0.0% 9 -4.3% -0.8% +0.1% Signed-off-by: Nick Terrell <terrelln@fb.com>
This commit is contained in:
parent
4782c725c1
commit
2aa14b1ab2
File diff suppressed because it is too large
Load Diff
@ -313,7 +313,16 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
|
||||
U32 const regMask = sizeof(bitContainer)*8 - 1;
|
||||
/* if start > regMask, bitstream is corrupted, and result is undefined */
|
||||
assert(nbBits < BIT_MASK_SIZE);
|
||||
/* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
|
||||
* than accessing memory. When bmi2 instruction is not present, we consider
|
||||
* such cpus old (pre-Haswell, 2013) and their performance is not of that
|
||||
* importance.
|
||||
*/
|
||||
#if defined(__x86_64__) || defined(_M_X86)
|
||||
return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
|
||||
#else
|
||||
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
||||
#endif
|
||||
}
|
||||
|
||||
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
||||
|
@ -11,6 +11,8 @@
|
||||
#ifndef ZSTD_COMPILER_H
|
||||
#define ZSTD_COMPILER_H
|
||||
|
||||
#include "portability_macros.h"
|
||||
|
||||
/*-*******************************************************
|
||||
* Compiler specifics
|
||||
*********************************************************/
|
||||
@ -34,7 +36,7 @@
|
||||
|
||||
/*
|
||||
On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
|
||||
This explictly marks such functions as __cdecl so that the code will still compile
|
||||
This explicitly marks such functions as __cdecl so that the code will still compile
|
||||
if a CC other than __cdecl has been made the default.
|
||||
*/
|
||||
#define WIN_CDECL
|
||||
@ -70,25 +72,13 @@
|
||||
|
||||
|
||||
/* target attribute */
|
||||
#ifndef __has_attribute
|
||||
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
|
||||
#endif
|
||||
#define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
|
||||
|
||||
/* Enable runtime BMI2 dispatch based on the CPU.
|
||||
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
|
||||
/* Target attribute for BMI2 dynamic dispatch.
|
||||
* Enable lzcnt, bmi, and bmi2.
|
||||
* We test for bmi1 & bmi2. lzcnt is included in bmi1.
|
||||
*/
|
||||
#ifndef DYNAMIC_BMI2
|
||||
#if ((defined(__clang__) && __has_attribute(__target__)) \
|
||||
|| (defined(__GNUC__) \
|
||||
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
|
||||
&& (defined(__x86_64__) || defined(_M_X86)) \
|
||||
&& !defined(__BMI2__)
|
||||
# define DYNAMIC_BMI2 1
|
||||
#else
|
||||
# define DYNAMIC_BMI2 0
|
||||
#endif
|
||||
#endif
|
||||
#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
|
||||
|
||||
/* prefetch
|
||||
* can be disabled, by declaring NO_PREFETCH build macro */
|
||||
@ -115,8 +105,9 @@
|
||||
}
|
||||
|
||||
/* vectorization
|
||||
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
|
||||
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
|
||||
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
|
||||
* and some compilers, like Intel ICC and MCST LCC, do not support it at all. */
|
||||
#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__)
|
||||
# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
|
||||
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
|
||||
# else
|
||||
@ -134,20 +125,18 @@
|
||||
#define LIKELY(x) (__builtin_expect((x), 1))
|
||||
#define UNLIKELY(x) (__builtin_expect((x), 0))
|
||||
|
||||
#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
|
||||
# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); }
|
||||
#else
|
||||
# define ZSTD_UNREACHABLE { assert(0); }
|
||||
#endif
|
||||
|
||||
/* disable warnings */
|
||||
|
||||
/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
|
||||
|
||||
|
||||
/* compat. with non-clang compilers */
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
/* compat. with non-clang compilers */
|
||||
#ifndef __has_feature
|
||||
# define __has_feature(x) 0
|
||||
#endif
|
||||
/* compile time determination of SIMD support */
|
||||
|
||||
/* C-language Attributes are added in C23. */
|
||||
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
|
||||
@ -168,10 +157,28 @@
|
||||
*/
|
||||
#define ZSTD_FALLTHROUGH fallthrough
|
||||
|
||||
/* detects whether we are being compiled under msan */
|
||||
/*-**************************************************************
|
||||
* Alignment check
|
||||
*****************************************************************/
|
||||
|
||||
/* this test was initially positioned in mem.h,
|
||||
* but this file is removed (or replaced) for linux kernel
|
||||
* so it's now hosted in compiler.h,
|
||||
* which remains valid for both user & kernel spaces.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_ALIGNOF
|
||||
/* covers gcc, clang & MSVC */
|
||||
/* note : this section must come first, before C11,
|
||||
* due to a limitation in the kernel source generator */
|
||||
# define ZSTD_ALIGNOF(T) __alignof(T)
|
||||
|
||||
#endif /* ZSTD_ALIGNOF */
|
||||
|
||||
/*-**************************************************************
|
||||
* Sanitizer
|
||||
*****************************************************************/
|
||||
|
||||
/* detects whether we are being compiled under asan */
|
||||
|
||||
|
||||
#endif /* ZSTD_COMPILER_H */
|
||||
|
@ -212,7 +212,7 @@ static size_t FSE_readNCount_body_default(
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
|
||||
BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2(
|
||||
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize)
|
||||
{
|
||||
@ -240,6 +240,7 @@ size_t FSE_readNCount(
|
||||
return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
|
||||
}
|
||||
|
||||
|
||||
/*! HUF_readStats() :
|
||||
Read compact Huffman tree, saved by HUF_writeCTable().
|
||||
`huffWeight` is destination buffer.
|
||||
@ -293,7 +294,7 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
|
||||
weightTotal = 0;
|
||||
{ U32 n; for (n=0; n<oSize; n++) {
|
||||
if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
|
||||
if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
|
||||
rankStats[huffWeight[n]]++;
|
||||
weightTotal += (1 << huffWeight[n]) >> 1;
|
||||
} }
|
||||
@ -331,7 +332,7 @@ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* r
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workSpace, size_t wkspSize)
|
||||
|
@ -18,8 +18,10 @@
|
||||
/* ****************************************
|
||||
* Dependencies
|
||||
******************************************/
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
#include <linux/zstd_errors.h> /* enum list */
|
||||
#include "compiler.h"
|
||||
#include "debug.h"
|
||||
#include "zstd_deps.h" /* size_t */
|
||||
|
||||
|
||||
/* ****************************************
|
||||
@ -62,5 +64,82 @@ ERR_STATIC const char* ERR_getErrorName(size_t code)
|
||||
return ERR_getErrorString(ERR_getErrorCode(code));
|
||||
}
|
||||
|
||||
/*
|
||||
* Ignore: this is an internal helper.
|
||||
*
|
||||
* This is a helper function to help force C99-correctness during compilation.
|
||||
* Under strict compilation modes, variadic macro arguments can't be empty.
|
||||
* However, variadic function arguments can be. Using a function therefore lets
|
||||
* us statically check that at least one (string) argument was passed,
|
||||
* independent of the compilation flags.
|
||||
*/
|
||||
static INLINE_KEYWORD UNUSED_ATTR
|
||||
void _force_has_format_string(const char *format, ...) {
|
||||
(void)format;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ignore: this is an internal helper.
|
||||
*
|
||||
* We want to force this function invocation to be syntactically correct, but
|
||||
* we don't want to force runtime evaluation of its arguments.
|
||||
*/
|
||||
#define _FORCE_HAS_FORMAT_STRING(...) \
|
||||
if (0) { \
|
||||
_force_has_format_string(__VA_ARGS__); \
|
||||
}
|
||||
|
||||
#define ERR_QUOTE(str) #str
|
||||
|
||||
/*
|
||||
* Return the specified error if the condition evaluates to true.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
* In order to do that (particularly, printing the conditional that failed),
|
||||
* this can't just wrap RETURN_ERROR().
|
||||
*/
|
||||
#define RETURN_ERROR_IF(cond, err, ...) \
|
||||
if (cond) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
|
||||
__FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
}
|
||||
|
||||
/*
|
||||
* Unconditionally return the specified error.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define RETURN_ERROR(err, ...) \
|
||||
do { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
|
||||
__FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
} while(0);
|
||||
|
||||
/*
|
||||
* If the provided expression evaluates to an error code, returns that error code.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define FORWARD_IF_ERROR(err, ...) \
|
||||
do { \
|
||||
size_t const err_code = (err); \
|
||||
if (ERR_isError(err_code)) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
|
||||
__FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return err_code; \
|
||||
} \
|
||||
} while(0);
|
||||
|
||||
|
||||
#endif /* ERROR_H_MODULE */
|
||||
|
@ -333,8 +333,9 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
|
||||
/* FSE_buildCTable_wksp() :
|
||||
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
|
||||
* `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
|
||||
* See FSE_buildCTable_wksp() for breakdown of workspace usage.
|
||||
*/
|
||||
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2)))
|
||||
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
|
||||
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
|
||||
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
||||
|
||||
|
@ -365,7 +365,7 @@ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, co
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
||||
BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
|
||||
}
|
||||
|
@ -86,9 +86,9 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
|
||||
|
||||
/* HUF_compress4X_wksp() :
|
||||
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
|
||||
* `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
|
||||
#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
|
||||
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
|
||||
* `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
|
||||
#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
|
||||
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
|
||||
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned tableLog,
|
||||
@ -113,11 +113,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
|
||||
|
||||
|
||||
/* *** Constants *** */
|
||||
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
|
||||
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
|
||||
#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
|
||||
#define HUF_SYMBOLVALUE_MAX 255
|
||||
|
||||
#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
|
||||
#define HUF_TABLELOG_ABSOLUTEMAX 12 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
|
||||
#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
|
||||
# error "HUF_TABLELOG_MAX is too large !"
|
||||
#endif
|
||||
@ -133,15 +133,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
|
||||
|
||||
/* static allocation of HUF's Compression Table */
|
||||
/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
|
||||
struct HUF_CElt_s {
|
||||
U16 val;
|
||||
BYTE nbBits;
|
||||
}; /* typedef'd to HUF_CElt */
|
||||
typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */
|
||||
#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
|
||||
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
|
||||
typedef size_t HUF_CElt; /* consider it an incomplete type */
|
||||
#define HUF_CTABLE_SIZE_ST(maxSymbolValue) ((maxSymbolValue)+2) /* Use tables of size_t, for proper alignment */
|
||||
#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))
|
||||
#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
|
||||
HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
|
||||
HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */
|
||||
|
||||
/* static allocation of HUF's DTable */
|
||||
typedef U32 HUF_DTable;
|
||||
@ -191,6 +187,7 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym
|
||||
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
|
||||
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
|
||||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
|
||||
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
|
||||
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
|
||||
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
|
||||
|
||||
@ -203,12 +200,13 @@ typedef enum {
|
||||
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
|
||||
* If it uses hufTable it does not modify hufTable or repeat.
|
||||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
|
||||
* If preferRepeat then the old table will always be used if valid. */
|
||||
* If preferRepeat then the old table will always be used if valid.
|
||||
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
|
||||
size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned tableLog,
|
||||
void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
|
||||
|
||||
/* HUF_buildCTable_wksp() :
|
||||
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
|
||||
@ -246,11 +244,10 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
|
||||
* Loading a CTable saved with HUF_writeCTable() */
|
||||
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
|
||||
|
||||
/* HUF_getNbBits() :
|
||||
/* HUF_getNbBitsFromCTable() :
|
||||
* Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
|
||||
* Note 1 : is not inlined, as HUF_CElt definition is private
|
||||
* Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
|
||||
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
|
||||
* Note 1 : is not inlined, as HUF_CElt definition is private */
|
||||
U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
|
||||
|
||||
/*
|
||||
* HUF_decompress() does the following:
|
||||
@ -302,18 +299,20 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c
|
||||
/* ====================== */
|
||||
|
||||
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
|
||||
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
|
||||
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
|
||||
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
|
||||
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
|
||||
/* HUF_compress1X_repeat() :
|
||||
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
|
||||
* If it uses hufTable it does not modify hufTable or repeat.
|
||||
* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
|
||||
* If preferRepeat then the old table will always be used if valid. */
|
||||
* If preferRepeat then the old table will always be used if valid.
|
||||
* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
|
||||
size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned tableLog,
|
||||
void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
|
||||
|
||||
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
@ -351,6 +350,9 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
|
||||
#endif
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
|
||||
#endif
|
||||
|
||||
#endif /* HUF_STATIC_LINKING_ONLY */
|
||||
|
||||
|
@ -30,6 +30,8 @@
|
||||
* Basic Types
|
||||
*****************************************************************/
|
||||
typedef uint8_t BYTE;
|
||||
typedef uint8_t U8;
|
||||
typedef int8_t S8;
|
||||
typedef uint16_t U16;
|
||||
typedef int16_t S16;
|
||||
typedef uint32_t U32;
|
||||
|
93
lib/zstd/common/portability_macros.h
Normal file
93
lib/zstd/common/portability_macros.h
Normal file
@ -0,0 +1,93 @@
|
||||
/*
|
||||
* Copyright (c) Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_PORTABILITY_MACROS_H
|
||||
#define ZSTD_PORTABILITY_MACROS_H
|
||||
|
||||
/*
|
||||
* This header file contains macro defintions to support portability.
|
||||
* This header is shared between C and ASM code, so it MUST only
|
||||
* contain macro definitions. It MUST not contain any C code.
|
||||
*
|
||||
* This header ONLY defines macros to detect platforms/feature support.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/* compat. with non-clang compilers */
|
||||
#ifndef __has_attribute
|
||||
#define __has_attribute(x) 0
|
||||
#endif
|
||||
|
||||
/* compat. with non-clang compilers */
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
/* compat. with non-clang compilers */
|
||||
#ifndef __has_feature
|
||||
# define __has_feature(x) 0
|
||||
#endif
|
||||
|
||||
/* detects whether we are being compiled under msan */
|
||||
|
||||
/* detects whether we are being compiled under asan */
|
||||
|
||||
/* detects whether we are being compiled under dfsan */
|
||||
|
||||
/* Mark the internal assembly functions as hidden */
|
||||
#ifdef __ELF__
|
||||
# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
|
||||
#else
|
||||
# define ZSTD_HIDE_ASM_FUNCTION(func)
|
||||
#endif
|
||||
|
||||
/* Enable runtime BMI2 dispatch based on the CPU.
|
||||
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
|
||||
*/
|
||||
#ifndef DYNAMIC_BMI2
|
||||
#if ((defined(__clang__) && __has_attribute(__target__)) \
|
||||
|| (defined(__GNUC__) \
|
||||
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
|
||||
&& (defined(__x86_64__) || defined(_M_X64)) \
|
||||
&& !defined(__BMI2__)
|
||||
# define DYNAMIC_BMI2 1
|
||||
#else
|
||||
# define DYNAMIC_BMI2 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Only enable assembly for GNUC comptabile compilers,
|
||||
* because other platforms may not support GAS assembly syntax.
|
||||
*
|
||||
* Only enable assembly for Linux / MacOS, other platforms may
|
||||
* work, but they haven't been tested. This could likely be
|
||||
* extended to BSD systems.
|
||||
*
|
||||
* Disable assembly when MSAN is enabled, because MSAN requires
|
||||
* 100% of code to be instrumented to work.
|
||||
*/
|
||||
#define ZSTD_ASM_SUPPORTED 1
|
||||
|
||||
/*
|
||||
* Determines whether we should enable assembly for x86-64
|
||||
* with BMI2.
|
||||
*
|
||||
* Enable if all of the following conditions hold:
|
||||
* - ASM hasn't been explicitly disabled by defining ZSTD_DISABLE_ASM
|
||||
* - Assembly is supported
|
||||
* - We are compiling for x86-64 and either:
|
||||
* - DYNAMIC_BMI2 is enabled
|
||||
* - BMI2 is supported at compile time
|
||||
*/
|
||||
#define ZSTD_ENABLE_ASM_X86_64_BMI2 0
|
||||
|
||||
#endif /* ZSTD_PORTABILITY_MACROS_H */
|
@ -20,6 +20,7 @@
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include "compiler.h"
|
||||
#include "cpu.h"
|
||||
#include "mem.h"
|
||||
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
|
||||
#include "error_private.h"
|
||||
@ -47,81 +48,7 @@
|
||||
#undef MAX
|
||||
#define MIN(a,b) ((a)<(b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a)>(b) ? (a) : (b))
|
||||
|
||||
/*
|
||||
* Ignore: this is an internal helper.
|
||||
*
|
||||
* This is a helper function to help force C99-correctness during compilation.
|
||||
* Under strict compilation modes, variadic macro arguments can't be empty.
|
||||
* However, variadic function arguments can be. Using a function therefore lets
|
||||
* us statically check that at least one (string) argument was passed,
|
||||
* independent of the compilation flags.
|
||||
*/
|
||||
static INLINE_KEYWORD UNUSED_ATTR
|
||||
void _force_has_format_string(const char *format, ...) {
|
||||
(void)format;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ignore: this is an internal helper.
|
||||
*
|
||||
* We want to force this function invocation to be syntactically correct, but
|
||||
* we don't want to force runtime evaluation of its arguments.
|
||||
*/
|
||||
#define _FORCE_HAS_FORMAT_STRING(...) \
|
||||
if (0) { \
|
||||
_force_has_format_string(__VA_ARGS__); \
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the specified error if the condition evaluates to true.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
* In order to do that (particularly, printing the conditional that failed),
|
||||
* this can't just wrap RETURN_ERROR().
|
||||
*/
|
||||
#define RETURN_ERROR_IF(cond, err, ...) \
|
||||
if (cond) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
|
||||
__FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
}
|
||||
|
||||
/*
|
||||
* Unconditionally return the specified error.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define RETURN_ERROR(err, ...) \
|
||||
do { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
|
||||
__FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return ERROR(err); \
|
||||
} while(0);
|
||||
|
||||
/*
|
||||
* If the provided expression evaluates to an error code, returns that error code.
|
||||
*
|
||||
* In debug modes, prints additional information.
|
||||
*/
|
||||
#define FORWARD_IF_ERROR(err, ...) \
|
||||
do { \
|
||||
size_t const err_code = (err); \
|
||||
if (ERR_isError(err_code)) { \
|
||||
RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
|
||||
__FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
|
||||
_FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
|
||||
RAWLOG(3, ": " __VA_ARGS__); \
|
||||
RAWLOG(3, "\n"); \
|
||||
return err_code; \
|
||||
} \
|
||||
} while(0);
|
||||
#define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))
|
||||
|
||||
|
||||
/*-*************************************
|
||||
@ -130,7 +57,6 @@ void _force_has_format_string(const char *format, ...) {
|
||||
#define ZSTD_OPT_NUM (1<<12)
|
||||
|
||||
#define ZSTD_REP_NUM 3 /* number of repcodes */
|
||||
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
|
||||
static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
|
||||
|
||||
#define KB *(1 <<10)
|
||||
@ -182,7 +108,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
|
||||
/* Each table cannot take more than #symbols * FSELog bits */
|
||||
#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
|
||||
|
||||
static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = {
|
||||
static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 2, 2, 3, 3,
|
||||
@ -199,7 +125,7 @@ static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
|
||||
#define LL_DEFAULTNORMLOG 6 /* for static allocation */
|
||||
static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
|
||||
|
||||
static UNUSED_ATTR const U32 ML_bits[MaxML+1] = {
|
||||
static UNUSED_ATTR const U8 ML_bits[MaxML+1] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
@ -234,12 +160,31 @@ static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
|
||||
* Shared functions to include for inlining
|
||||
*********************************************/
|
||||
static void ZSTD_copy8(void* dst, const void* src) {
|
||||
#if defined(ZSTD_ARCH_ARM_NEON)
|
||||
vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
|
||||
#else
|
||||
ZSTD_memcpy(dst, src, 8);
|
||||
#endif
|
||||
}
|
||||
|
||||
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
|
||||
|
||||
/* Need to use memmove here since the literal buffer can now be located within
|
||||
the dst buffer. In circumstances where the op "catches up" to where the
|
||||
literal buffer is, there can be partial overlaps in this call on the final
|
||||
copy if the literal is being shifted by less than 16 bytes. */
|
||||
static void ZSTD_copy16(void* dst, const void* src) {
|
||||
ZSTD_memcpy(dst, src, 16);
|
||||
#if defined(ZSTD_ARCH_ARM_NEON)
|
||||
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
|
||||
#elif defined(ZSTD_ARCH_X86_SSE2)
|
||||
_mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
|
||||
#elif defined(__clang__)
|
||||
ZSTD_memmove(dst, src, 16);
|
||||
#else
|
||||
/* ZSTD_memmove is not inlined properly by gcc */
|
||||
BYTE copy16_buf[16];
|
||||
ZSTD_memcpy(copy16_buf, src, 16);
|
||||
ZSTD_memcpy(dst, copy16_buf, 16);
|
||||
#endif
|
||||
}
|
||||
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
|
||||
|
||||
@ -267,8 +212,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
|
||||
BYTE* op = (BYTE*)dst;
|
||||
BYTE* const oend = op + length;
|
||||
|
||||
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
|
||||
|
||||
if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
|
||||
/* Handle short offset copies. */
|
||||
do {
|
||||
@ -331,11 +274,18 @@ typedef enum {
|
||||
* Private declarations
|
||||
*********************************************/
|
||||
typedef struct seqDef_s {
|
||||
U32 offset; /* Offset code of the sequence */
|
||||
U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
|
||||
U16 litLength;
|
||||
U16 matchLength;
|
||||
U16 mlBase; /* mlBase == matchLength - MINMATCH */
|
||||
} seqDef;
|
||||
|
||||
/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
|
||||
typedef enum {
|
||||
ZSTD_llt_none = 0, /* no longLengthType */
|
||||
ZSTD_llt_literalLength = 1, /* represents a long literal */
|
||||
ZSTD_llt_matchLength = 2 /* represents a long match */
|
||||
} ZSTD_longLengthType_e;
|
||||
|
||||
typedef struct {
|
||||
seqDef* sequencesStart;
|
||||
seqDef* sequences; /* ptr to end of sequences */
|
||||
@ -347,12 +297,12 @@ typedef struct {
|
||||
size_t maxNbSeq;
|
||||
size_t maxNbLit;
|
||||
|
||||
/* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength
|
||||
/* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
|
||||
* in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
|
||||
* the existing value of the litLength or matchLength by 0x10000.
|
||||
*/
|
||||
U32 longLengthID; /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */
|
||||
U32 longLengthPos; /* Index of the sequence to apply long length modification to */
|
||||
ZSTD_longLengthType_e longLengthType;
|
||||
U32 longLengthPos; /* Index of the sequence to apply long length modification to */
|
||||
} seqStore_t;
|
||||
|
||||
typedef struct {
|
||||
@ -362,18 +312,18 @@ typedef struct {
|
||||
|
||||
/*
|
||||
* Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
|
||||
* indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
|
||||
* indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
|
||||
*/
|
||||
MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
|
||||
{
|
||||
ZSTD_sequenceLength seqLen;
|
||||
seqLen.litLength = seq->litLength;
|
||||
seqLen.matchLength = seq->matchLength + MINMATCH;
|
||||
seqLen.matchLength = seq->mlBase + MINMATCH;
|
||||
if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
|
||||
if (seqStore->longLengthID == 1) {
|
||||
if (seqStore->longLengthType == ZSTD_llt_literalLength) {
|
||||
seqLen.litLength += 0xFFFF;
|
||||
}
|
||||
if (seqStore->longLengthID == 2) {
|
||||
if (seqStore->longLengthType == ZSTD_llt_matchLength) {
|
||||
seqLen.matchLength += 0xFFFF;
|
||||
}
|
||||
}
|
||||
@ -419,6 +369,41 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Counts the number of trailing zeros of a `size_t`.
|
||||
* Most compilers should support CTZ as a builtin. A backup
|
||||
* implementation is provided if the builtin isn't supported, but
|
||||
* it may not be terribly efficient.
|
||||
*/
|
||||
MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
|
||||
{
|
||||
if (MEM_64bits()) {
|
||||
# if (__GNUC__ >= 4)
|
||||
return __builtin_ctzll((U64)val);
|
||||
# else
|
||||
static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19,
|
||||
4, 25, 14, 28, 9, 34, 20, 56,
|
||||
5, 17, 26, 54, 15, 41, 29, 43,
|
||||
10, 31, 38, 35, 21, 45, 49, 57,
|
||||
63, 6, 12, 18, 24, 27, 33, 55,
|
||||
16, 53, 40, 42, 30, 37, 44, 48,
|
||||
62, 11, 23, 32, 52, 39, 36, 47,
|
||||
61, 22, 51, 46, 60, 50, 59, 58 };
|
||||
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
|
||||
# endif
|
||||
} else { /* 32 bits */
|
||||
# if (__GNUC__ >= 3)
|
||||
return __builtin_ctz((U32)val);
|
||||
# else
|
||||
static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3,
|
||||
30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7,
|
||||
26, 12, 18, 6, 11, 5, 10, 9 };
|
||||
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
|
||||
# endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* ZSTD_invalidateRepCodes() :
|
||||
* ensures next compression will not use repcodes from previous block.
|
||||
@ -445,6 +430,14 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
||||
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
/*
|
||||
* @returns true iff the CPU supports dynamic BMI2 dispatch.
|
||||
*/
|
||||
MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
|
||||
{
|
||||
ZSTD_cpuid_t cpuid = ZSTD_cpuid();
|
||||
return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
|
||||
}
|
||||
|
||||
|
||||
#endif /* ZSTD_CCOMMON_H_MODULE */
|
||||
|
132
lib/zstd/compress/clevels.h
Normal file
132
lib/zstd/compress/clevels.h
Normal file
@ -0,0 +1,132 @@
|
||||
/*
|
||||
* Copyright (c) Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_CLEVELS_H
|
||||
#define ZSTD_CLEVELS_H
|
||||
|
||||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */
|
||||
#include <linux/zstd.h>
|
||||
|
||||
/*-===== Pre-defined compression levels =====-*/
|
||||
|
||||
#define ZSTD_MAX_CLEVEL 22
|
||||
|
||||
__attribute__((__unused__))
|
||||
|
||||
static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
|
||||
{ /* "default" - for any srcSize > 256 KB */
|
||||
/* W, C, H, S, L, TL, strat */
|
||||
{ 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
|
||||
{ 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
|
||||
{ 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
|
||||
{ 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */
|
||||
{ 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */
|
||||
{ 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */
|
||||
{ 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */
|
||||
{ 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */
|
||||
{ 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */
|
||||
{ 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
|
||||
{ 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */
|
||||
{ 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 11 */
|
||||
{ 22, 22, 23, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */
|
||||
{ 22, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
|
||||
{ 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
|
||||
{ 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
|
||||
{ 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */
|
||||
{ 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */
|
||||
{ 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */
|
||||
{ 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */
|
||||
{ 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */
|
||||
{ 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */
|
||||
{ 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */
|
||||
},
|
||||
{ /* for srcSize <= 256 KB */
|
||||
/* W, C, H, S, L, T, strat */
|
||||
{ 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
||||
{ 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
|
||||
{ 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */
|
||||
{ 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */
|
||||
{ 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/
|
||||
{ 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/
|
||||
{ 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
|
||||
{ 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */
|
||||
{ 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
|
||||
{ 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
|
||||
{ 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
|
||||
{ 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/
|
||||
{ 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/
|
||||
{ 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */
|
||||
{ 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
|
||||
{ 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/
|
||||
{ 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/
|
||||
{ 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/
|
||||
{ 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/
|
||||
{ 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
|
||||
{ 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/
|
||||
{ 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/
|
||||
{ 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/
|
||||
},
|
||||
{ /* for srcSize <= 128 KB */
|
||||
/* W, C, H, S, L, T, strat */
|
||||
{ 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
||||
{ 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
|
||||
{ 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
|
||||
{ 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */
|
||||
{ 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */
|
||||
{ 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
|
||||
{ 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
|
||||
{ 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
|
||||
{ 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
|
||||
{ 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
|
||||
{ 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
|
||||
{ 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */
|
||||
{ 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */
|
||||
{ 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/
|
||||
{ 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
|
||||
{ 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/
|
||||
{ 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/
|
||||
{ 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/
|
||||
{ 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/
|
||||
{ 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/
|
||||
{ 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/
|
||||
{ 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
|
||||
{ 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/
|
||||
},
|
||||
{ /* for srcSize <= 16 KB */
|
||||
/* W, C, H, S, L, T, strat */
|
||||
{ 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
|
||||
{ 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
|
||||
{ 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
|
||||
{ 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */
|
||||
{ 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
|
||||
{ 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
|
||||
{ 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
|
||||
{ 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
|
||||
{ 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/
|
||||
{ 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
|
||||
{ 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
|
||||
{ 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
|
||||
{ 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/
|
||||
{ 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/
|
||||
{ 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/
|
||||
{ 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/
|
||||
{ 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/
|
||||
{ 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/
|
||||
{ 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/
|
||||
{ 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
|
||||
{ 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/
|
||||
{ 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
|
||||
{ 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif /* ZSTD_CLEVELS_H */
|
@ -75,13 +75,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
|
||||
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
|
||||
U32 const step = FSE_TABLESTEP(tableSize);
|
||||
U32 const maxSV1 = maxSymbolValue+1;
|
||||
|
||||
U32* cumul = (U32*)workSpace;
|
||||
FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
|
||||
U16* cumul = (U16*)workSpace; /* size = maxSV1 */
|
||||
FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */
|
||||
|
||||
U32 highThreshold = tableSize-1;
|
||||
|
||||
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
|
||||
assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */
|
||||
if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
|
||||
/* CTable header */
|
||||
tableU16[-2] = (U16) tableLog;
|
||||
@ -98,20 +99,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
/* symbol start positions */
|
||||
{ U32 u;
|
||||
cumul[0] = 0;
|
||||
for (u=1; u <= maxSymbolValue+1; u++) {
|
||||
for (u=1; u <= maxSV1; u++) {
|
||||
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
|
||||
cumul[u] = cumul[u-1] + 1;
|
||||
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
|
||||
} else {
|
||||
cumul[u] = cumul[u-1] + normalizedCounter[u-1];
|
||||
assert(normalizedCounter[u-1] >= 0);
|
||||
cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
|
||||
assert(cumul[u] >= cumul[u-1]); /* no overflow */
|
||||
} }
|
||||
cumul[maxSymbolValue+1] = tableSize+1;
|
||||
cumul[maxSV1] = (U16)(tableSize+1);
|
||||
}
|
||||
|
||||
/* Spread symbols */
|
||||
{ U32 position = 0;
|
||||
if (highThreshold == tableSize - 1) {
|
||||
/* Case for no low prob count symbols. Lay down 8 bytes at a time
|
||||
* to reduce branch misses since we are operating on a small block
|
||||
*/
|
||||
BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
|
||||
{ U64 const add = 0x0101010101010101ull;
|
||||
size_t pos = 0;
|
||||
U64 sv = 0;
|
||||
U32 s;
|
||||
for (s=0; s<maxSV1; ++s, sv += add) {
|
||||
int i;
|
||||
int const n = normalizedCounter[s];
|
||||
MEM_write64(spread + pos, sv);
|
||||
for (i = 8; i < n; i += 8) {
|
||||
MEM_write64(spread + pos + i, sv);
|
||||
}
|
||||
assert(n>=0);
|
||||
pos += (size_t)n;
|
||||
}
|
||||
}
|
||||
/* Spread symbols across the table. Lack of lowprob symbols means that
|
||||
* we don't need variable sized inner loop, so we can unroll the loop and
|
||||
* reduce branch misses.
|
||||
*/
|
||||
{ size_t position = 0;
|
||||
size_t s;
|
||||
size_t const unroll = 2; /* Experimentally determined optimal unroll */
|
||||
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
||||
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
||||
size_t u;
|
||||
for (u = 0; u < unroll; ++u) {
|
||||
size_t const uPosition = (position + (u * step)) & tableMask;
|
||||
tableSymbol[uPosition] = spread[s + u];
|
||||
}
|
||||
position = (position + (unroll * step)) & tableMask;
|
||||
}
|
||||
assert(position == 0); /* Must have initialized all positions */
|
||||
}
|
||||
} else {
|
||||
U32 position = 0;
|
||||
U32 symbol;
|
||||
for (symbol=0; symbol<=maxSymbolValue; symbol++) {
|
||||
for (symbol=0; symbol<maxSV1; symbol++) {
|
||||
int nbOccurrences;
|
||||
int const freq = normalizedCounter[symbol];
|
||||
for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
|
||||
@ -120,7 +162,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
while (position > highThreshold)
|
||||
position = (position + step) & tableMask; /* Low proba area */
|
||||
} }
|
||||
|
||||
assert(position==0); /* Must have initialized all positions */
|
||||
}
|
||||
|
||||
@ -144,16 +185,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
case -1:
|
||||
case 1:
|
||||
symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
|
||||
symbolTT[s].deltaFindState = total - 1;
|
||||
assert(total <= INT_MAX);
|
||||
symbolTT[s].deltaFindState = (int)(total - 1);
|
||||
total ++;
|
||||
break;
|
||||
default :
|
||||
{
|
||||
U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
|
||||
U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
|
||||
assert(normalizedCounter[s] > 1);
|
||||
{ U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
|
||||
U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
|
||||
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
|
||||
symbolTT[s].deltaFindState = total - normalizedCounter[s];
|
||||
total += normalizedCounter[s];
|
||||
symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
|
||||
total += (unsigned)normalizedCounter[s];
|
||||
} } } }
|
||||
|
||||
#if 0 /* debug : symbol costs */
|
||||
@ -164,8 +206,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
symbol, normalizedCounter[symbol],
|
||||
FSE_getMaxNbBits(symbolTT, symbol),
|
||||
(double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
|
||||
}
|
||||
}
|
||||
} }
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
@ -173,16 +214,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
|
||||
|
||||
|
||||
|
||||
#ifndef FSE_COMMONDEFS_ONLY
|
||||
|
||||
|
||||
/*-**************************************************************
|
||||
* FSE NCount encoding
|
||||
****************************************************************/
|
||||
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
|
||||
{
|
||||
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
|
||||
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
|
||||
+ 4 /* bitCount initialized at 4 */
|
||||
+ 2 /* first two symbols may use one additional bit each */) / 8)
|
||||
+ 1 /* round up to whole nb bytes */
|
||||
+ 2 /* additional two bytes for bitstream flush */;
|
||||
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
|
||||
}
|
||||
|
||||
|
@ -50,6 +50,28 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
|
||||
/* *******************************************************
|
||||
* HUF : Huffman block compression
|
||||
*********************************************************/
|
||||
#define HUF_WORKSPACE_MAX_ALIGNMENT 8
|
||||
|
||||
static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
|
||||
{
|
||||
size_t const mask = align - 1;
|
||||
size_t const rem = (size_t)workspace & mask;
|
||||
size_t const add = (align - rem) & mask;
|
||||
BYTE* const aligned = (BYTE*)workspace + add;
|
||||
assert((align & (align - 1)) == 0); /* pow 2 */
|
||||
assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
|
||||
if (*workspaceSizePtr >= add) {
|
||||
assert(add < align);
|
||||
assert(((size_t)aligned & mask) == 0);
|
||||
*workspaceSizePtr -= add;
|
||||
return aligned;
|
||||
} else {
|
||||
*workspaceSizePtr = 0;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* HUF_compressWeights() :
|
||||
* Same as FSE_compress(), but dedicated to huff0's weights compression.
|
||||
* The use case needs much less stack memory.
|
||||
@ -72,7 +94,7 @@ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightT
|
||||
|
||||
unsigned maxSymbolValue = HUF_TABLELOG_MAX;
|
||||
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
|
||||
HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
|
||||
HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
|
||||
|
||||
if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
|
||||
|
||||
@ -103,6 +125,40 @@ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightT
|
||||
return (size_t)(op-ostart);
|
||||
}
|
||||
|
||||
static size_t HUF_getNbBits(HUF_CElt elt)
|
||||
{
|
||||
return elt & 0xFF;
|
||||
}
|
||||
|
||||
static size_t HUF_getNbBitsFast(HUF_CElt elt)
|
||||
{
|
||||
return elt;
|
||||
}
|
||||
|
||||
static size_t HUF_getValue(HUF_CElt elt)
|
||||
{
|
||||
return elt & ~0xFF;
|
||||
}
|
||||
|
||||
static size_t HUF_getValueFast(HUF_CElt elt)
|
||||
{
|
||||
return elt;
|
||||
}
|
||||
|
||||
static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
|
||||
{
|
||||
assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
|
||||
*elt = nbBits;
|
||||
}
|
||||
|
||||
static void HUF_setValue(HUF_CElt* elt, size_t value)
|
||||
{
|
||||
size_t const nbBits = HUF_getNbBits(*elt);
|
||||
if (nbBits > 0) {
|
||||
assert((value >> nbBits) == 0);
|
||||
*elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
HUF_CompressWeightsWksp wksp;
|
||||
@ -114,9 +170,10 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
|
||||
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
|
||||
void* workspace, size_t workspaceSize)
|
||||
{
|
||||
HUF_CElt const* const ct = CTable + 1;
|
||||
BYTE* op = (BYTE*)dst;
|
||||
U32 n;
|
||||
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
|
||||
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
|
||||
|
||||
/* check conditions */
|
||||
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
|
||||
@ -127,9 +184,10 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
|
||||
for (n=1; n<huffLog+1; n++)
|
||||
wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
|
||||
for (n=0; n<maxSymbolValue; n++)
|
||||
wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];
|
||||
wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
|
||||
|
||||
/* attempt weights compression by FSE */
|
||||
if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
|
||||
{ CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
|
||||
if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
|
||||
op[0] = (BYTE)hSize;
|
||||
@ -163,6 +221,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
||||
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
|
||||
U32 tableLog = 0;
|
||||
U32 nbSymbols = 0;
|
||||
HUF_CElt* const ct = CTable + 1;
|
||||
|
||||
/* get symbol weights */
|
||||
CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
|
||||
@ -172,6 +231,8 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
||||
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
||||
if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
|
||||
|
||||
CTable[0] = tableLog;
|
||||
|
||||
/* Prepare base value per rank */
|
||||
{ U32 n, nextRankStart = 0;
|
||||
for (n=1; n<=tableLog; n++) {
|
||||
@ -183,13 +244,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
||||
/* fill nbBits */
|
||||
{ U32 n; for (n=0; n<nbSymbols; n++) {
|
||||
const U32 w = huffWeight[n];
|
||||
CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
|
||||
HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
|
||||
} }
|
||||
|
||||
/* fill val */
|
||||
{ U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
|
||||
U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
|
||||
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
|
||||
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
|
||||
/* determine stating value per rank */
|
||||
valPerRank[tableLog+1] = 0; /* for w==0 */
|
||||
{ U16 min = 0;
|
||||
@ -199,18 +260,18 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
||||
min >>= 1;
|
||||
} }
|
||||
/* assign value within rank, symbol order */
|
||||
{ U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
|
||||
{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
|
||||
}
|
||||
|
||||
*maxSymbolValuePtr = nbSymbols - 1;
|
||||
return readSize;
|
||||
}
|
||||
|
||||
U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
|
||||
U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
|
||||
{
|
||||
const HUF_CElt* table = (const HUF_CElt*)symbolTable;
|
||||
const HUF_CElt* ct = CTable + 1;
|
||||
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
|
||||
return table[symbolValue].nbBits;
|
||||
return (U32)HUF_getNbBits(ct[symbolValue]);
|
||||
}
|
||||
|
||||
|
||||
@ -364,22 +425,118 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
U32 base;
|
||||
U32 curr;
|
||||
U16 base;
|
||||
U16 curr;
|
||||
} rankPos;
|
||||
|
||||
typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
|
||||
|
||||
#define RANK_POSITION_TABLE_SIZE 32
|
||||
/* Number of buckets available for HUF_sort() */
|
||||
#define RANK_POSITION_TABLE_SIZE 192
|
||||
|
||||
typedef struct {
|
||||
huffNodeTable huffNodeTbl;
|
||||
rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
|
||||
} HUF_buildCTable_wksp_tables;
|
||||
|
||||
/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
|
||||
* Strategy is to use as many buckets as possible for representing distinct
|
||||
* counts while using the remainder to represent all "large" counts.
|
||||
*
|
||||
* To satisfy this requirement for 192 buckets, we can do the following:
|
||||
* Let buckets 0-166 represent distinct counts of [0, 166]
|
||||
* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
|
||||
*/
|
||||
#define RANK_POSITION_MAX_COUNT_LOG 32
|
||||
#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
|
||||
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
|
||||
|
||||
/* Return the appropriate bucket index for a given count. See definition of
|
||||
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
|
||||
*/
|
||||
static U32 HUF_getIndex(U32 const count) {
|
||||
return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
|
||||
? count
|
||||
: BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
|
||||
}
|
||||
|
||||
/* Helper swap function for HUF_quickSortPartition() */
|
||||
static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
|
||||
nodeElt tmp = *a;
|
||||
*a = *b;
|
||||
*b = tmp;
|
||||
}
|
||||
|
||||
/* Returns 0 if the huffNode array is not sorted by descending count */
|
||||
MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
|
||||
U32 i;
|
||||
for (i = 1; i < maxSymbolValue1; ++i) {
|
||||
if (huffNode[i].count > huffNode[i-1].count) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Insertion sort by descending order */
|
||||
HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
|
||||
int i;
|
||||
int const size = high-low+1;
|
||||
huffNode += low;
|
||||
for (i = 1; i < size; ++i) {
|
||||
nodeElt const key = huffNode[i];
|
||||
int j = i - 1;
|
||||
while (j >= 0 && huffNode[j].count < key.count) {
|
||||
huffNode[j + 1] = huffNode[j];
|
||||
j--;
|
||||
}
|
||||
huffNode[j + 1] = key;
|
||||
}
|
||||
}
|
||||
|
||||
/* Pivot helper function for quicksort. */
|
||||
static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
|
||||
/* Simply select rightmost element as pivot. "Better" selectors like
|
||||
* median-of-three don't experimentally appear to have any benefit.
|
||||
*/
|
||||
U32 const pivot = arr[high].count;
|
||||
int i = low - 1;
|
||||
int j = low;
|
||||
for ( ; j < high; j++) {
|
||||
if (arr[j].count > pivot) {
|
||||
i++;
|
||||
HUF_swapNodes(&arr[i], &arr[j]);
|
||||
}
|
||||
}
|
||||
HUF_swapNodes(&arr[i + 1], &arr[high]);
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
/* Classic quicksort by descending with partially iterative calls
|
||||
* to reduce worst case callstack size.
|
||||
*/
|
||||
static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
|
||||
int const kInsertionSortThreshold = 8;
|
||||
if (high - low < kInsertionSortThreshold) {
|
||||
HUF_insertionSort(arr, low, high);
|
||||
return;
|
||||
}
|
||||
while (low < high) {
|
||||
int const idx = HUF_quickSortPartition(arr, low, high);
|
||||
if (idx - low < high - idx) {
|
||||
HUF_simpleQuickSort(arr, low, idx - 1);
|
||||
low = idx + 1;
|
||||
} else {
|
||||
HUF_simpleQuickSort(arr, idx + 1, high);
|
||||
high = idx - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* HUF_sort():
|
||||
* Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
|
||||
* This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
|
||||
*
|
||||
* @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
|
||||
* Must have (maxSymbolValue + 1) entries.
|
||||
@ -387,44 +544,52 @@ typedef struct {
|
||||
* @param[in] maxSymbolValue Maximum symbol value.
|
||||
* @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
|
||||
*/
|
||||
static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
|
||||
{
|
||||
int n;
|
||||
int const maxSymbolValue1 = (int)maxSymbolValue + 1;
|
||||
static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
|
||||
U32 n;
|
||||
U32 const maxSymbolValue1 = maxSymbolValue+1;
|
||||
|
||||
/* Compute base and set curr to base.
|
||||
* For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1.
|
||||
* Then 2^lowerRank <= count[n]+1 <= 2^rank.
|
||||
* For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
|
||||
* See HUF_getIndex to see bucketing strategy.
|
||||
* We attribute each symbol to lowerRank's base value, because we want to know where
|
||||
* each rank begins in the output, so for rank R we want to count ranks R+1 and above.
|
||||
*/
|
||||
ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
|
||||
for (n = 0; n < maxSymbolValue1; ++n) {
|
||||
U32 lowerRank = BIT_highbit32(count[n] + 1);
|
||||
U32 lowerRank = HUF_getIndex(count[n]);
|
||||
assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
|
||||
rankPosition[lowerRank].base++;
|
||||
}
|
||||
|
||||
assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
|
||||
/* Set up the rankPosition table */
|
||||
for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
|
||||
rankPosition[n-1].base += rankPosition[n].base;
|
||||
rankPosition[n-1].curr = rankPosition[n-1].base;
|
||||
}
|
||||
/* Sort */
|
||||
|
||||
/* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
|
||||
for (n = 0; n < maxSymbolValue1; ++n) {
|
||||
U32 const c = count[n];
|
||||
U32 const r = BIT_highbit32(c+1) + 1;
|
||||
U32 pos = rankPosition[r].curr++;
|
||||
/* Insert into the correct position in the rank.
|
||||
* We have at most 256 symbols, so this insertion should be fine.
|
||||
*/
|
||||
while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
|
||||
huffNode[pos] = huffNode[pos-1];
|
||||
pos--;
|
||||
}
|
||||
U32 const r = HUF_getIndex(c) + 1;
|
||||
U32 const pos = rankPosition[r].curr++;
|
||||
assert(pos < maxSymbolValue1);
|
||||
huffNode[pos].count = c;
|
||||
huffNode[pos].byte = (BYTE)n;
|
||||
}
|
||||
}
|
||||
|
||||
/* Sort each bucket. */
|
||||
for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
|
||||
U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base;
|
||||
U32 const bucketStartIdx = rankPosition[n].base;
|
||||
if (bucketSize > 1) {
|
||||
assert(bucketStartIdx < maxSymbolValue1);
|
||||
HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
|
||||
}
|
||||
}
|
||||
|
||||
assert(HUF_isSorted(huffNode, maxSymbolValue1));
|
||||
}
|
||||
|
||||
/* HUF_buildCTable_wksp() :
|
||||
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
|
||||
@ -487,6 +652,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
|
||||
*/
|
||||
static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
|
||||
{
|
||||
HUF_CElt* const ct = CTable + 1;
|
||||
/* fill result into ctable (val, nbBits) */
|
||||
int n;
|
||||
U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
|
||||
@ -502,20 +668,20 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
|
||||
min >>= 1;
|
||||
} }
|
||||
for (n=0; n<alphabetSize; n++)
|
||||
CTable[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
|
||||
HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
|
||||
for (n=0; n<alphabetSize; n++)
|
||||
CTable[n].val = valPerRank[CTable[n].nbBits]++; /* assign value within rank, symbol order */
|
||||
HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
|
||||
CTable[0] = maxNbBits;
|
||||
}
|
||||
|
||||
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
|
||||
size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
|
||||
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
|
||||
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
|
||||
nodeElt* const huffNode = huffNode0+1;
|
||||
int nonNullRank;
|
||||
|
||||
/* safety checks */
|
||||
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
|
||||
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
|
||||
return ERROR(workSpace_tooSmall);
|
||||
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
|
||||
@ -533,99 +699,334 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
|
||||
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
|
||||
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
|
||||
|
||||
HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
|
||||
HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
|
||||
|
||||
return maxNbBits;
|
||||
}
|
||||
|
||||
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
|
||||
{
|
||||
HUF_CElt const* ct = CTable + 1;
|
||||
size_t nbBits = 0;
|
||||
int s;
|
||||
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
||||
nbBits += CTable[s].nbBits * count[s];
|
||||
nbBits += HUF_getNbBits(ct[s]) * count[s];
|
||||
}
|
||||
return nbBits >> 3;
|
||||
}
|
||||
|
||||
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
|
||||
HUF_CElt const* ct = CTable + 1;
|
||||
int bad = 0;
|
||||
int s;
|
||||
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
||||
bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
|
||||
bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
|
||||
}
|
||||
return !bad;
|
||||
}
|
||||
|
||||
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
|
||||
|
||||
FORCE_INLINE_TEMPLATE void
|
||||
HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
|
||||
/* HUF_CStream_t:
|
||||
* Huffman uses its own BIT_CStream_t implementation.
|
||||
* There are three major differences from BIT_CStream_t:
|
||||
* 1. HUF_addBits() takes a HUF_CElt (size_t) which is
|
||||
* the pair (nbBits, value) in the format:
|
||||
* format:
|
||||
* - Bits [0, 4) = nbBits
|
||||
* - Bits [4, 64 - nbBits) = 0
|
||||
* - Bits [64 - nbBits, 64) = value
|
||||
* 2. The bitContainer is built from the upper bits and
|
||||
* right shifted. E.g. to add a new value of N bits
|
||||
* you right shift the bitContainer by N, then or in
|
||||
* the new value into the N upper bits.
|
||||
* 3. The bitstream has two bit containers. You can add
|
||||
* bits to the second container and merge them into
|
||||
* the first container.
|
||||
*/
|
||||
|
||||
#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
|
||||
|
||||
typedef struct {
|
||||
size_t bitContainer[2];
|
||||
size_t bitPos[2];
|
||||
|
||||
BYTE* startPtr;
|
||||
BYTE* ptr;
|
||||
BYTE* endPtr;
|
||||
} HUF_CStream_t;
|
||||
|
||||
/*! HUF_initCStream():
|
||||
* Initializes the bitstream.
|
||||
* @returns 0 or an error code.
|
||||
*/
|
||||
static size_t HUF_initCStream(HUF_CStream_t* bitC,
|
||||
void* startPtr, size_t dstCapacity)
|
||||
{
|
||||
BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
|
||||
ZSTD_memset(bitC, 0, sizeof(*bitC));
|
||||
bitC->startPtr = (BYTE*)startPtr;
|
||||
bitC->ptr = bitC->startPtr;
|
||||
bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
|
||||
if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HUF_FLUSHBITS(s) BIT_flushBits(s)
|
||||
/*! HUF_addBits():
|
||||
* Adds the symbol stored in HUF_CElt elt to the bitstream.
|
||||
*
|
||||
* @param elt The element we're adding. This is a (nbBits, value) pair.
|
||||
* See the HUF_CStream_t docs for the format.
|
||||
* @param idx Insert into the bitstream at this idx.
|
||||
* @param kFast This is a template parameter. If the bitstream is guaranteed
|
||||
* to have at least 4 unused bits after this call it may be 1,
|
||||
* otherwise it must be 0. HUF_addBits() is faster when fast is set.
|
||||
*/
|
||||
FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
|
||||
{
|
||||
assert(idx <= 1);
|
||||
assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
|
||||
/* This is efficient on x86-64 with BMI2 because shrx
|
||||
* only reads the low 6 bits of the register. The compiler
|
||||
* knows this and elides the mask. When fast is set,
|
||||
* every operation can use the same value loaded from elt.
|
||||
*/
|
||||
bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
|
||||
bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
|
||||
/* We only read the low 8 bits of bitC->bitPos[idx] so it
|
||||
* doesn't matter that the high bits have noise from the value.
|
||||
*/
|
||||
bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
|
||||
assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
||||
/* The last 4-bits of elt are dirty if fast is set,
|
||||
* so we must not be overwriting bits that have already been
|
||||
* inserted into the bit container.
|
||||
*/
|
||||
#if DEBUGLEVEL >= 1
|
||||
{
|
||||
size_t const nbBits = HUF_getNbBits(elt);
|
||||
size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
|
||||
(void)dirtyBits;
|
||||
/* Middle bits are 0. */
|
||||
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
|
||||
/* We didn't overwrite any bits in the bit container. */
|
||||
assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
||||
(void)dirtyBits;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define HUF_FLUSHBITS_1(stream) \
|
||||
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
|
||||
FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
|
||||
{
|
||||
bitC->bitContainer[1] = 0;
|
||||
bitC->bitPos[1] = 0;
|
||||
}
|
||||
|
||||
/*! HUF_mergeIndex1() :
|
||||
* Merges the bit container @ index 1 into the bit container @ index 0
|
||||
* and zeros the bit container @ index 1.
|
||||
*/
|
||||
FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
|
||||
{
|
||||
assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
|
||||
bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
|
||||
bitC->bitContainer[0] |= bitC->bitContainer[1];
|
||||
bitC->bitPos[0] += bitC->bitPos[1];
|
||||
assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
||||
}
|
||||
|
||||
/*! HUF_flushBits() :
|
||||
* Flushes the bits in the bit container @ index 0.
|
||||
*
|
||||
* @post bitPos will be < 8.
|
||||
* @param kFast If kFast is set then we must know a-priori that
|
||||
* the bit container will not overflow.
|
||||
*/
|
||||
FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
|
||||
{
|
||||
/* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
|
||||
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
||||
size_t const nbBytes = nbBits >> 3;
|
||||
/* The top nbBits bits of bitContainer are the ones we need. */
|
||||
size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
|
||||
/* Mask bitPos to account for the bytes we consumed. */
|
||||
bitC->bitPos[0] &= 7;
|
||||
assert(nbBits > 0);
|
||||
assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
|
||||
assert(bitC->ptr <= bitC->endPtr);
|
||||
MEM_writeLEST(bitC->ptr, bitContainer);
|
||||
bitC->ptr += nbBytes;
|
||||
assert(!kFast || bitC->ptr <= bitC->endPtr);
|
||||
if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
|
||||
/* bitContainer doesn't need to be modified because the leftover
|
||||
* bits are already the top bitPos bits. And we don't care about
|
||||
* noise in the lower values.
|
||||
*/
|
||||
}
|
||||
|
||||
/*! HUF_endMark()
|
||||
* @returns The Huffman stream end mark: A 1-bit value = 1.
|
||||
*/
|
||||
static HUF_CElt HUF_endMark(void)
|
||||
{
|
||||
HUF_CElt endMark;
|
||||
HUF_setNbBits(&endMark, 1);
|
||||
HUF_setValue(&endMark, 1);
|
||||
return endMark;
|
||||
}
|
||||
|
||||
/*! HUF_closeCStream() :
|
||||
* @return Size of CStream, in bytes,
|
||||
* or 0 if it could not fit into dstBuffer */
|
||||
static size_t HUF_closeCStream(HUF_CStream_t* bitC)
|
||||
{
|
||||
HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
|
||||
HUF_flushBits(bitC, /* kFast */ 0);
|
||||
{
|
||||
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
||||
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
|
||||
return (bitC->ptr - bitC->startPtr) + (nbBits > 0);
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE void
|
||||
HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
|
||||
{
|
||||
HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE void
|
||||
HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
|
||||
const BYTE* ip, size_t srcSize,
|
||||
const HUF_CElt* ct,
|
||||
int kUnroll, int kFastFlush, int kLastFast)
|
||||
{
|
||||
/* Join to kUnroll */
|
||||
int n = (int)srcSize;
|
||||
int rem = n % kUnroll;
|
||||
if (rem > 0) {
|
||||
for (; rem > 0; --rem) {
|
||||
HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
|
||||
}
|
||||
HUF_flushBits(bitC, kFastFlush);
|
||||
}
|
||||
assert(n % kUnroll == 0);
|
||||
|
||||
/* Join to 2 * kUnroll */
|
||||
if (n % (2 * kUnroll)) {
|
||||
int u;
|
||||
for (u = 1; u < kUnroll; ++u) {
|
||||
HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
|
||||
}
|
||||
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
|
||||
HUF_flushBits(bitC, kFastFlush);
|
||||
n -= kUnroll;
|
||||
}
|
||||
assert(n % (2 * kUnroll) == 0);
|
||||
|
||||
for (; n>0; n-= 2 * kUnroll) {
|
||||
/* Encode kUnroll symbols into the bitstream @ index 0. */
|
||||
int u;
|
||||
for (u = 1; u < kUnroll; ++u) {
|
||||
HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
|
||||
}
|
||||
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
|
||||
HUF_flushBits(bitC, kFastFlush);
|
||||
/* Encode kUnroll symbols into the bitstream @ index 1.
|
||||
* This allows us to start filling the bit container
|
||||
* without any data dependencies.
|
||||
*/
|
||||
HUF_zeroIndex1(bitC);
|
||||
for (u = 1; u < kUnroll; ++u) {
|
||||
HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
|
||||
}
|
||||
HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
|
||||
/* Merge bitstream @ index 1 into the bitstream @ index 0 */
|
||||
HUF_mergeIndex1(bitC);
|
||||
HUF_flushBits(bitC, kFastFlush);
|
||||
}
|
||||
assert(n == 0);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns a tight upper bound on the output space needed by Huffman
|
||||
* with 8 bytes buffer to handle over-writes. If the output is at least
|
||||
* this large we don't need to do bounds checks during Huffman encoding.
|
||||
*/
|
||||
static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
|
||||
{
|
||||
return ((srcSize * tableLog) >> 3) + 8;
|
||||
}
|
||||
|
||||
#define HUF_FLUSHBITS_2(stream) \
|
||||
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
const HUF_CElt* CTable)
|
||||
{
|
||||
U32 const tableLog = (U32)CTable[0];
|
||||
HUF_CElt const* ct = CTable + 1;
|
||||
const BYTE* ip = (const BYTE*) src;
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
BYTE* op = ostart;
|
||||
size_t n;
|
||||
BIT_CStream_t bitC;
|
||||
HUF_CStream_t bitC;
|
||||
|
||||
/* init */
|
||||
if (dstSize < 8) return 0; /* not enough space to compress */
|
||||
{ size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
|
||||
{ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
|
||||
if (HUF_isError(initErr)) return 0; }
|
||||
|
||||
n = srcSize & ~3; /* join to mod 4 */
|
||||
switch (srcSize & 3)
|
||||
{
|
||||
case 3:
|
||||
HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
|
||||
HUF_FLUSHBITS_2(&bitC);
|
||||
ZSTD_FALLTHROUGH;
|
||||
case 2:
|
||||
HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
|
||||
HUF_FLUSHBITS_1(&bitC);
|
||||
ZSTD_FALLTHROUGH;
|
||||
case 1:
|
||||
HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
|
||||
HUF_FLUSHBITS(&bitC);
|
||||
ZSTD_FALLTHROUGH;
|
||||
case 0: ZSTD_FALLTHROUGH;
|
||||
default: break;
|
||||
if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
|
||||
else {
|
||||
if (MEM_32bits()) {
|
||||
switch (tableLog) {
|
||||
case 11:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
|
||||
break;
|
||||
case 10: ZSTD_FALLTHROUGH;
|
||||
case 9: ZSTD_FALLTHROUGH;
|
||||
case 8:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
|
||||
break;
|
||||
case 7: ZSTD_FALLTHROUGH;
|
||||
default:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (tableLog) {
|
||||
case 11:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
|
||||
break;
|
||||
case 10:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
|
||||
break;
|
||||
case 9:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
|
||||
break;
|
||||
case 8:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
|
||||
break;
|
||||
case 7:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
|
||||
break;
|
||||
case 6: ZSTD_FALLTHROUGH;
|
||||
default:
|
||||
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(bitC.ptr <= bitC.endPtr);
|
||||
|
||||
for (; n>0; n-=4) { /* note : n&3==0 at this stage */
|
||||
HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
|
||||
HUF_FLUSHBITS_1(&bitC);
|
||||
HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
|
||||
HUF_FLUSHBITS_2(&bitC);
|
||||
HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
|
||||
HUF_FLUSHBITS_1(&bitC);
|
||||
HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
|
||||
HUF_FLUSHBITS(&bitC);
|
||||
}
|
||||
|
||||
return BIT_closeCStream(&bitC);
|
||||
return HUF_closeCStream(&bitC);
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t
|
||||
static BMI2_TARGET_ATTRIBUTE size_t
|
||||
HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
const HUF_CElt* CTable)
|
||||
@ -667,9 +1068,13 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
|
||||
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
||||
{
|
||||
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
||||
return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
||||
}
|
||||
|
||||
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
|
||||
{
|
||||
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
|
||||
}
|
||||
|
||||
static size_t
|
||||
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
@ -689,8 +1094,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
|
||||
assert(op <= oend);
|
||||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
|
||||
if (cSize==0) return 0;
|
||||
assert(cSize <= 65535);
|
||||
if (cSize == 0 || cSize > 65535) return 0;
|
||||
MEM_writeLE16(ostart, (U16)cSize);
|
||||
op += cSize;
|
||||
}
|
||||
@ -698,8 +1102,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
ip += segmentSize;
|
||||
assert(op <= oend);
|
||||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
|
||||
if (cSize==0) return 0;
|
||||
assert(cSize <= 65535);
|
||||
if (cSize == 0 || cSize > 65535) return 0;
|
||||
MEM_writeLE16(ostart+2, (U16)cSize);
|
||||
op += cSize;
|
||||
}
|
||||
@ -707,8 +1110,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
ip += segmentSize;
|
||||
assert(op <= oend);
|
||||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
|
||||
if (cSize==0) return 0;
|
||||
assert(cSize <= 65535);
|
||||
if (cSize == 0 || cSize > 65535) return 0;
|
||||
MEM_writeLE16(ostart+4, (U16)cSize);
|
||||
op += cSize;
|
||||
}
|
||||
@ -717,7 +1119,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
assert(op <= oend);
|
||||
assert(ip <= iend);
|
||||
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
|
||||
if (cSize==0) return 0;
|
||||
if (cSize == 0 || cSize > 65535) return 0;
|
||||
op += cSize;
|
||||
}
|
||||
|
||||
@ -726,7 +1128,12 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
||||
|
||||
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
||||
{
|
||||
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
||||
return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
||||
}
|
||||
|
||||
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
|
||||
{
|
||||
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
|
||||
}
|
||||
|
||||
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
|
||||
@ -750,35 +1157,38 @@ static size_t HUF_compressCTable_internal(
|
||||
|
||||
typedef struct {
|
||||
unsigned count[HUF_SYMBOLVALUE_MAX + 1];
|
||||
HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
|
||||
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
|
||||
union {
|
||||
HUF_buildCTable_wksp_tables buildCTable_wksp;
|
||||
HUF_WriteCTableWksp writeCTable_wksp;
|
||||
U32 hist_wksp[HIST_WKSP_SIZE_U32];
|
||||
} wksps;
|
||||
} HUF_compress_tables_t;
|
||||
|
||||
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
|
||||
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
|
||||
|
||||
/* HUF_compress_internal() :
|
||||
* `workSpace_align4` must be aligned on 4-bytes boundaries,
|
||||
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */
|
||||
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
|
||||
static size_t
|
||||
HUF_compress_internal (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog,
|
||||
HUF_nbStreams_e nbStreams,
|
||||
void* workSpace_align4, size_t wkspSize,
|
||||
void* workSpace, size_t wkspSize,
|
||||
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
|
||||
const int bmi2)
|
||||
const int bmi2, unsigned suspectUncompressible)
|
||||
{
|
||||
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4;
|
||||
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
BYTE* op = ostart;
|
||||
|
||||
HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
|
||||
assert(((size_t)workSpace_align4 & 3) == 0); /* must be aligned on 4-bytes boundaries */
|
||||
HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
|
||||
|
||||
/* checks & inits */
|
||||
if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
|
||||
if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
|
||||
if (!srcSize) return 0; /* Uncompressed */
|
||||
if (!dstSize) return 0; /* cannot fit anything within dst budget */
|
||||
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
|
||||
@ -794,8 +1204,23 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
||||
nbStreams, oldHufTable, bmi2);
|
||||
}
|
||||
|
||||
/* If uncompressible data is suspected, do a smaller sampling first */
|
||||
DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
|
||||
if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
|
||||
size_t largestTotal = 0;
|
||||
{ unsigned maxSymbolValueBegin = maxSymbolValue;
|
||||
CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
|
||||
largestTotal += largestBegin;
|
||||
}
|
||||
{ unsigned maxSymbolValueEnd = maxSymbolValue;
|
||||
CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
|
||||
largestTotal += largestEnd;
|
||||
}
|
||||
if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
||||
}
|
||||
|
||||
/* Scan input and build symbol stats */
|
||||
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) );
|
||||
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
|
||||
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
|
||||
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
||||
}
|
||||
@ -820,9 +1245,12 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
||||
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
|
||||
CHECK_F(maxBits);
|
||||
huffLog = (U32)maxBits;
|
||||
/* Zero unused symbols in CTable, so we can check it for validity */
|
||||
ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0,
|
||||
sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
|
||||
}
|
||||
/* Zero unused symbols in CTable, so we can check it for validity */
|
||||
{
|
||||
size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
|
||||
size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
|
||||
ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
|
||||
}
|
||||
|
||||
/* Write table description header */
|
||||
@ -859,19 +1287,20 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
||||
maxSymbolValue, huffLog, HUF_singleStream,
|
||||
workSpace, wkspSize,
|
||||
NULL, NULL, 0, 0 /*bmi2*/);
|
||||
NULL, NULL, 0, 0 /*bmi2*/, 0);
|
||||
}
|
||||
|
||||
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog,
|
||||
void* workSpace, size_t wkspSize,
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
|
||||
int bmi2, unsigned suspectUncompressible)
|
||||
{
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
||||
maxSymbolValue, huffLog, HUF_singleStream,
|
||||
workSpace, wkspSize, hufTable,
|
||||
repeat, preferRepeat, bmi2);
|
||||
repeat, preferRepeat, bmi2, suspectUncompressible);
|
||||
}
|
||||
|
||||
/* HUF_compress4X_repeat():
|
||||
@ -885,21 +1314,22 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
||||
maxSymbolValue, huffLog, HUF_fourStreams,
|
||||
workSpace, wkspSize,
|
||||
NULL, NULL, 0, 0 /*bmi2*/);
|
||||
NULL, NULL, 0, 0 /*bmi2*/, 0);
|
||||
}
|
||||
|
||||
/* HUF_compress4X_repeat():
|
||||
* compress input using 4 streams.
|
||||
* consider skipping quickly
|
||||
* re-use an existing huffman compression table */
|
||||
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog,
|
||||
void* workSpace, size_t wkspSize,
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible)
|
||||
{
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
||||
maxSymbolValue, huffLog, HUF_fourStreams,
|
||||
workSpace, wkspSize,
|
||||
hufTable, repeat, preferRepeat, bmi2);
|
||||
hufTable, repeat, preferRepeat, bmi2, suspectUncompressible);
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -57,7 +57,7 @@ typedef struct {
|
||||
} ZSTD_localDict;
|
||||
|
||||
typedef struct {
|
||||
HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
|
||||
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
|
||||
HUF_repeat repeatMode;
|
||||
} ZSTD_hufCTables_t;
|
||||
|
||||
@ -75,8 +75,55 @@ typedef struct {
|
||||
ZSTD_fseCTables_t fse;
|
||||
} ZSTD_entropyCTables_t;
|
||||
|
||||
/* *********************************************
|
||||
* Entropy buffer statistics structs and funcs *
|
||||
***********************************************/
|
||||
/* ZSTD_hufCTablesMetadata_t :
|
||||
* Stores Literals Block Type for a super-block in hType, and
|
||||
* huffman tree description in hufDesBuffer.
|
||||
* hufDesSize refers to the size of huffman tree description in bytes.
|
||||
* This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
|
||||
typedef struct {
|
||||
U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */
|
||||
symbolEncodingType_e hType;
|
||||
BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
|
||||
size_t hufDesSize;
|
||||
} ZSTD_hufCTablesMetadata_t;
|
||||
|
||||
/* ZSTD_fseCTablesMetadata_t :
|
||||
* Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
|
||||
* fse tables in fseTablesBuffer.
|
||||
* fseTablesSize refers to the size of fse tables in bytes.
|
||||
* This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
|
||||
typedef struct {
|
||||
symbolEncodingType_e llType;
|
||||
symbolEncodingType_e ofType;
|
||||
symbolEncodingType_e mlType;
|
||||
BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
|
||||
size_t fseTablesSize;
|
||||
size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
|
||||
} ZSTD_fseCTablesMetadata_t;
|
||||
|
||||
typedef struct {
|
||||
ZSTD_hufCTablesMetadata_t hufMetadata;
|
||||
ZSTD_fseCTablesMetadata_t fseMetadata;
|
||||
} ZSTD_entropyCTablesMetadata_t;
|
||||
|
||||
/* ZSTD_buildBlockEntropyStats() :
|
||||
* Builds entropy for the block.
|
||||
* @return : 0 on success or error code */
|
||||
size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
|
||||
const ZSTD_entropyCTables_t* prevEntropy,
|
||||
ZSTD_entropyCTables_t* nextEntropy,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
||||
void* workspace, size_t wkspSize);
|
||||
|
||||
/* *******************************
|
||||
* Compression internals structs *
|
||||
*********************************/
|
||||
|
||||
typedef struct {
|
||||
U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
|
||||
U32 len; /* Raw length of match */
|
||||
} ZSTD_match_t;
|
||||
|
||||
@ -126,7 +173,7 @@ typedef struct {
|
||||
U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
|
||||
ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
|
||||
const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
ZSTD_paramSwitch_e literalCompressionMode;
|
||||
} optState_t;
|
||||
|
||||
typedef struct {
|
||||
@ -135,14 +182,23 @@ typedef struct {
|
||||
} ZSTD_compressedBlockState_t;
|
||||
|
||||
typedef struct {
|
||||
BYTE const* nextSrc; /* next block here to continue on current prefix */
|
||||
BYTE const* base; /* All regular indexes relative to this position */
|
||||
BYTE const* dictBase; /* extDict indexes relative to this position */
|
||||
U32 dictLimit; /* below that point, need extDict */
|
||||
U32 lowLimit; /* below that point, no more valid data */
|
||||
BYTE const* nextSrc; /* next block here to continue on current prefix */
|
||||
BYTE const* base; /* All regular indexes relative to this position */
|
||||
BYTE const* dictBase; /* extDict indexes relative to this position */
|
||||
U32 dictLimit; /* below that point, need extDict */
|
||||
U32 lowLimit; /* below that point, no more valid data */
|
||||
U32 nbOverflowCorrections; /* Number of times overflow correction has run since
|
||||
* ZSTD_window_init(). Useful for debugging coredumps
|
||||
* and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
|
||||
*/
|
||||
} ZSTD_window_t;
|
||||
|
||||
#define ZSTD_WINDOW_START_INDEX 2
|
||||
|
||||
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
|
||||
|
||||
#define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */
|
||||
|
||||
struct ZSTD_matchState_t {
|
||||
ZSTD_window_t window; /* State for window round buffer management */
|
||||
U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
|
||||
@ -154,9 +210,17 @@ struct ZSTD_matchState_t {
|
||||
*/
|
||||
U32 nextToUpdate; /* index from which to continue table update */
|
||||
U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
|
||||
|
||||
U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
|
||||
U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
|
||||
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
|
||||
|
||||
U32* hashTable;
|
||||
U32* hashTable3;
|
||||
U32* chainTable;
|
||||
|
||||
U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
|
||||
|
||||
int dedicatedDictSearch; /* Indicates whether this matchState is using the
|
||||
* dedicated dictionary search structure.
|
||||
*/
|
||||
@ -196,7 +260,7 @@ typedef struct {
|
||||
} ldmState_t;
|
||||
|
||||
typedef struct {
|
||||
U32 enableLdm; /* 1 if enable long distance matching */
|
||||
ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
|
||||
U32 hashLog; /* Log size of hashTable */
|
||||
U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
|
||||
U32 minMatchLength; /* Minimum match length */
|
||||
@ -227,7 +291,7 @@ struct ZSTD_CCtx_params_s {
|
||||
* There is no guarantee that hint is close to actual source size */
|
||||
|
||||
ZSTD_dictAttachPref_e attachDictPref;
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
ZSTD_paramSwitch_e literalCompressionMode;
|
||||
|
||||
/* Multithreading: used to pass parameters to mtctx */
|
||||
int nbWorkers;
|
||||
@ -249,6 +313,15 @@ struct ZSTD_CCtx_params_s {
|
||||
ZSTD_sequenceFormat_e blockDelimiters;
|
||||
int validateSequences;
|
||||
|
||||
/* Block splitting */
|
||||
ZSTD_paramSwitch_e useBlockSplitter;
|
||||
|
||||
/* Param for deciding whether to use row-based matchfinder */
|
||||
ZSTD_paramSwitch_e useRowMatchFinder;
|
||||
|
||||
/* Always load a dictionary in ext-dict mode (not prefix mode)? */
|
||||
int deterministicRefPrefix;
|
||||
|
||||
/* Internal use, for createCCtxParams() and freeCCtxParams() only */
|
||||
ZSTD_customMem customMem;
|
||||
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
|
||||
@ -266,12 +339,29 @@ typedef enum {
|
||||
ZSTDb_buffered
|
||||
} ZSTD_buffered_policy_e;
|
||||
|
||||
/*
|
||||
* Struct that contains all elements of block splitter that should be allocated
|
||||
* in a wksp.
|
||||
*/
|
||||
#define ZSTD_MAX_NB_BLOCK_SPLITS 196
|
||||
typedef struct {
|
||||
seqStore_t fullSeqStoreChunk;
|
||||
seqStore_t firstHalfSeqStore;
|
||||
seqStore_t secondHalfSeqStore;
|
||||
seqStore_t currSeqStore;
|
||||
seqStore_t nextSeqStore;
|
||||
|
||||
U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
|
||||
ZSTD_entropyCTablesMetadata_t entropyMetadata;
|
||||
} ZSTD_blockSplitCtx;
|
||||
|
||||
struct ZSTD_CCtx_s {
|
||||
ZSTD_compressionStage_e stage;
|
||||
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
|
||||
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
|
||||
ZSTD_CCtx_params requestedParams;
|
||||
ZSTD_CCtx_params appliedParams;
|
||||
ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
|
||||
U32 dictID;
|
||||
size_t dictContentSize;
|
||||
|
||||
@ -296,7 +386,7 @@ struct ZSTD_CCtx_s {
|
||||
ZSTD_blockState_t blockState;
|
||||
U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
|
||||
|
||||
/* Wether we are streaming or not */
|
||||
/* Whether we are streaming or not */
|
||||
ZSTD_buffered_policy_e bufferedPolicy;
|
||||
|
||||
/* streaming */
|
||||
@ -324,6 +414,9 @@ struct ZSTD_CCtx_s {
|
||||
/* Multi-threading */
|
||||
|
||||
/* Tracing */
|
||||
|
||||
/* Workspace for block splitter */
|
||||
ZSTD_blockSplitCtx blockSplitCtx;
|
||||
};
|
||||
|
||||
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
|
||||
@ -358,7 +451,7 @@ typedef enum {
|
||||
typedef size_t (*ZSTD_blockCompressor) (
|
||||
ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
|
||||
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
|
||||
|
||||
|
||||
MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
|
||||
@ -392,31 +485,6 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
|
||||
return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
|
||||
}
|
||||
|
||||
typedef struct repcodes_s {
|
||||
U32 rep[3];
|
||||
} repcodes_t;
|
||||
|
||||
MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
|
||||
{
|
||||
repcodes_t newReps;
|
||||
if (offset >= ZSTD_REP_NUM) { /* full offset */
|
||||
newReps.rep[2] = rep[1];
|
||||
newReps.rep[1] = rep[0];
|
||||
newReps.rep[0] = offset - ZSTD_REP_MOVE;
|
||||
} else { /* repcode */
|
||||
U32 const repCode = offset + ll0;
|
||||
if (repCode > 0) { /* note : if repCode==0, no change */
|
||||
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
||||
newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
|
||||
newReps.rep[1] = rep[0];
|
||||
newReps.rep[0] = currentOffset;
|
||||
} else { /* repCode == 0 */
|
||||
ZSTD_memcpy(&newReps, rep, sizeof(newReps));
|
||||
}
|
||||
}
|
||||
return newReps;
|
||||
}
|
||||
|
||||
/* ZSTD_cParam_withinBounds:
|
||||
* @return 1 if value is within cParam bounds,
|
||||
* 0 otherwise */
|
||||
@ -465,17 +533,17 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
|
||||
return (srcSize >> minlog) + 2;
|
||||
}
|
||||
|
||||
MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
|
||||
MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
|
||||
{
|
||||
switch (cctxParams->literalCompressionMode) {
|
||||
case ZSTD_lcm_huffman:
|
||||
case ZSTD_ps_enable:
|
||||
return 0;
|
||||
case ZSTD_lcm_uncompressed:
|
||||
case ZSTD_ps_disable:
|
||||
return 1;
|
||||
default:
|
||||
assert(0 /* impossible: pre-validated */);
|
||||
ZSTD_FALLTHROUGH;
|
||||
case ZSTD_lcm_auto:
|
||||
case ZSTD_ps_auto:
|
||||
return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
|
||||
}
|
||||
}
|
||||
@ -485,7 +553,9 @@ MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParam
|
||||
* Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
|
||||
* large copies.
|
||||
*/
|
||||
static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
|
||||
static void
|
||||
ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
|
||||
{
|
||||
assert(iend > ilimit_w);
|
||||
if (ip <= ilimit_w) {
|
||||
ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
|
||||
@ -495,14 +565,30 @@ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const ie
|
||||
while (ip < iend) *op++ = *ip++;
|
||||
}
|
||||
|
||||
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
|
||||
#define STORE_REPCODE_1 STORE_REPCODE(1)
|
||||
#define STORE_REPCODE_2 STORE_REPCODE(2)
|
||||
#define STORE_REPCODE_3 STORE_REPCODE(3)
|
||||
#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
|
||||
#define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE)
|
||||
#define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE)
|
||||
#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
|
||||
#define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
|
||||
#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */
|
||||
#define STORED_TO_OFFBASE(o) ((o)+1)
|
||||
#define OFFBASE_TO_STORED(o) ((o)-1)
|
||||
|
||||
/*! ZSTD_storeSeq() :
|
||||
* Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
|
||||
* `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
|
||||
* `mlBase` : matchLength - MINMATCH
|
||||
* Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
|
||||
* @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
|
||||
* @matchLength : must be >= MINMATCH
|
||||
* Allowed to overread literals up to litLimit.
|
||||
*/
|
||||
HINT_INLINE UNUSED_ATTR
|
||||
void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
|
||||
HINT_INLINE UNUSED_ATTR void
|
||||
ZSTD_storeSeq(seqStore_t* seqStorePtr,
|
||||
size_t litLength, const BYTE* literals, const BYTE* litLimit,
|
||||
U32 offBase_minus1,
|
||||
size_t matchLength)
|
||||
{
|
||||
BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
|
||||
BYTE const* const litEnd = literals + litLength;
|
||||
@ -511,7 +597,7 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
|
||||
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
|
||||
{ U32 const pos = (U32)((const BYTE*)literals - g_start);
|
||||
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
|
||||
pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
|
||||
pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
|
||||
}
|
||||
#endif
|
||||
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
|
||||
@ -535,26 +621,66 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
|
||||
|
||||
/* literal Length */
|
||||
if (litLength>0xFFFF) {
|
||||
assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
|
||||
seqStorePtr->longLengthID = 1;
|
||||
assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
|
||||
seqStorePtr->longLengthType = ZSTD_llt_literalLength;
|
||||
seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
||||
}
|
||||
seqStorePtr->sequences[0].litLength = (U16)litLength;
|
||||
|
||||
/* match offset */
|
||||
seqStorePtr->sequences[0].offset = offCode + 1;
|
||||
seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
|
||||
|
||||
/* match Length */
|
||||
if (mlBase>0xFFFF) {
|
||||
assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
|
||||
seqStorePtr->longLengthID = 2;
|
||||
seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
||||
assert(matchLength >= MINMATCH);
|
||||
{ size_t const mlBase = matchLength - MINMATCH;
|
||||
if (mlBase>0xFFFF) {
|
||||
assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
|
||||
seqStorePtr->longLengthType = ZSTD_llt_matchLength;
|
||||
seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
||||
}
|
||||
seqStorePtr->sequences[0].mlBase = (U16)mlBase;
|
||||
}
|
||||
seqStorePtr->sequences[0].matchLength = (U16)mlBase;
|
||||
|
||||
seqStorePtr->sequences++;
|
||||
}
|
||||
|
||||
/* ZSTD_updateRep() :
|
||||
* updates in-place @rep (array of repeat offsets)
|
||||
* @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
|
||||
*/
|
||||
MEM_STATIC void
|
||||
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
|
||||
{
|
||||
if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */
|
||||
rep[2] = rep[1];
|
||||
rep[1] = rep[0];
|
||||
rep[0] = STORED_OFFSET(offBase_minus1);
|
||||
} else { /* repcode */
|
||||
U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
|
||||
if (repCode > 0) { /* note : if repCode==0, no change */
|
||||
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
||||
rep[2] = (repCode >= 2) ? rep[1] : rep[2];
|
||||
rep[1] = rep[0];
|
||||
rep[0] = currentOffset;
|
||||
} else { /* repCode == 0 */
|
||||
/* nothing to do */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct repcodes_s {
|
||||
U32 rep[3];
|
||||
} repcodes_t;
|
||||
|
||||
MEM_STATIC repcodes_t
|
||||
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
|
||||
{
|
||||
repcodes_t newReps;
|
||||
ZSTD_memcpy(&newReps, rep, sizeof(newReps));
|
||||
ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
|
||||
return newReps;
|
||||
}
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Match length counter
|
||||
@ -778,6 +904,13 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
|
||||
window->dictLimit = end;
|
||||
}
|
||||
|
||||
MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
|
||||
{
|
||||
return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
|
||||
window.lowLimit == ZSTD_WINDOW_START_INDEX &&
|
||||
(window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
|
||||
}
|
||||
|
||||
/*
|
||||
* ZSTD_window_hasExtDict():
|
||||
* Returns non-zero if the window has a non-empty extDict.
|
||||
@ -801,15 +934,71 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
|
||||
ZSTD_noDict;
|
||||
}
|
||||
|
||||
/* Defining this macro to non-zero tells zstd to run the overflow correction
|
||||
* code much more frequently. This is very inefficient, and should only be
|
||||
* used for tests and fuzzers.
|
||||
*/
|
||||
#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
|
||||
# ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
|
||||
# else
|
||||
# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* ZSTD_window_canOverflowCorrect():
|
||||
* Returns non-zero if the indices are large enough for overflow correction
|
||||
* to work correctly without impacting compression ratio.
|
||||
*/
|
||||
MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
|
||||
U32 cycleLog,
|
||||
U32 maxDist,
|
||||
U32 loadedDictEnd,
|
||||
void const* src)
|
||||
{
|
||||
U32 const cycleSize = 1u << cycleLog;
|
||||
U32 const curr = (U32)((BYTE const*)src - window.base);
|
||||
U32 const minIndexToOverflowCorrect = cycleSize
|
||||
+ MAX(maxDist, cycleSize)
|
||||
+ ZSTD_WINDOW_START_INDEX;
|
||||
|
||||
/* Adjust the min index to backoff the overflow correction frequency,
|
||||
* so we don't waste too much CPU in overflow correction. If this
|
||||
* computation overflows we don't really care, we just need to make
|
||||
* sure it is at least minIndexToOverflowCorrect.
|
||||
*/
|
||||
U32 const adjustment = window.nbOverflowCorrections + 1;
|
||||
U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
|
||||
minIndexToOverflowCorrect);
|
||||
U32 const indexLargeEnough = curr > adjustedIndex;
|
||||
|
||||
/* Only overflow correct early if the dictionary is invalidated already,
|
||||
* so we don't hurt compression ratio.
|
||||
*/
|
||||
U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
|
||||
|
||||
return indexLargeEnough && dictionaryInvalidated;
|
||||
}
|
||||
|
||||
/*
|
||||
* ZSTD_window_needOverflowCorrection():
|
||||
* Returns non-zero if the indices are getting too large and need overflow
|
||||
* protection.
|
||||
*/
|
||||
MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
|
||||
U32 cycleLog,
|
||||
U32 maxDist,
|
||||
U32 loadedDictEnd,
|
||||
void const* src,
|
||||
void const* srcEnd)
|
||||
{
|
||||
U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
|
||||
if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
|
||||
if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return curr > ZSTD_CURRENT_MAX;
|
||||
}
|
||||
|
||||
@ -821,7 +1010,6 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
|
||||
*
|
||||
* The least significant cycleLog bits of the indices must remain the same,
|
||||
* which may be 0. Every index up to maxDist in the past must be valid.
|
||||
* NOTE: (maxDist & cycleMask) must be zero.
|
||||
*/
|
||||
MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
||||
U32 maxDist, void const* src)
|
||||
@ -845,32 +1033,52 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
||||
* 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
|
||||
* windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
|
||||
*/
|
||||
U32 const cycleMask = (1U << cycleLog) - 1;
|
||||
U32 const cycleSize = 1u << cycleLog;
|
||||
U32 const cycleMask = cycleSize - 1;
|
||||
U32 const curr = (U32)((BYTE const*)src - window->base);
|
||||
U32 const currentCycle0 = curr & cycleMask;
|
||||
/* Exclude zero so that newCurrent - maxDist >= 1. */
|
||||
U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
|
||||
U32 const newCurrent = currentCycle1 + maxDist;
|
||||
U32 const currentCycle = curr & cycleMask;
|
||||
/* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
|
||||
U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
|
||||
? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
|
||||
: 0;
|
||||
U32 const newCurrent = currentCycle
|
||||
+ currentCycleCorrection
|
||||
+ MAX(maxDist, cycleSize);
|
||||
U32 const correction = curr - newCurrent;
|
||||
assert((maxDist & cycleMask) == 0);
|
||||
/* maxDist must be a power of two so that:
|
||||
* (newCurrent & cycleMask) == (curr & cycleMask)
|
||||
* This is required to not corrupt the chains / binary tree.
|
||||
*/
|
||||
assert((maxDist & (maxDist - 1)) == 0);
|
||||
assert((curr & cycleMask) == (newCurrent & cycleMask));
|
||||
assert(curr > newCurrent);
|
||||
/* Loose bound, should be around 1<<29 (see above) */
|
||||
assert(correction > 1<<28);
|
||||
if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
|
||||
/* Loose bound, should be around 1<<29 (see above) */
|
||||
assert(correction > 1<<28);
|
||||
}
|
||||
|
||||
window->base += correction;
|
||||
window->dictBase += correction;
|
||||
if (window->lowLimit <= correction) window->lowLimit = 1;
|
||||
else window->lowLimit -= correction;
|
||||
if (window->dictLimit <= correction) window->dictLimit = 1;
|
||||
else window->dictLimit -= correction;
|
||||
if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
|
||||
window->lowLimit = ZSTD_WINDOW_START_INDEX;
|
||||
} else {
|
||||
window->lowLimit -= correction;
|
||||
}
|
||||
if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
|
||||
window->dictLimit = ZSTD_WINDOW_START_INDEX;
|
||||
} else {
|
||||
window->dictLimit -= correction;
|
||||
}
|
||||
|
||||
/* Ensure we can still reference the full window. */
|
||||
assert(newCurrent >= maxDist);
|
||||
assert(newCurrent - maxDist >= 1);
|
||||
assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
|
||||
/* Ensure that lowLimit and dictLimit didn't underflow. */
|
||||
assert(window->lowLimit <= newCurrent);
|
||||
assert(window->dictLimit <= newCurrent);
|
||||
|
||||
++window->nbOverflowCorrections;
|
||||
|
||||
DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
|
||||
window->lowLimit);
|
||||
return correction;
|
||||
@ -975,11 +1183,13 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
|
||||
|
||||
MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
|
||||
ZSTD_memset(window, 0, sizeof(*window));
|
||||
window->base = (BYTE const*)"";
|
||||
window->dictBase = (BYTE const*)"";
|
||||
window->dictLimit = 1; /* start from 1, so that 1st position is valid */
|
||||
window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
|
||||
window->nextSrc = window->base + 1; /* see issue #1241 */
|
||||
window->base = (BYTE const*)" ";
|
||||
window->dictBase = (BYTE const*)" ";
|
||||
ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
|
||||
window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */
|
||||
window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */
|
||||
window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */
|
||||
window->nbOverflowCorrections = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -990,7 +1200,8 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
|
||||
* Returns non-zero if the segment is contiguous.
|
||||
*/
|
||||
MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
|
||||
void const* src, size_t srcSize)
|
||||
void const* src, size_t srcSize,
|
||||
int forceNonContiguous)
|
||||
{
|
||||
BYTE const* const ip = (BYTE const*)src;
|
||||
U32 contiguous = 1;
|
||||
@ -1000,7 +1211,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
|
||||
assert(window->base != NULL);
|
||||
assert(window->dictBase != NULL);
|
||||
/* Check if blocks follow each other */
|
||||
if (src != window->nextSrc) {
|
||||
if (src != window->nextSrc || forceNonContiguous) {
|
||||
/* not contiguous */
|
||||
size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
|
||||
DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
|
||||
@ -1030,15 +1241,15 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
|
||||
*/
|
||||
MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
|
||||
{
|
||||
U32 const maxDistance = 1U << windowLog;
|
||||
U32 const lowestValid = ms->window.lowLimit;
|
||||
U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
|
||||
U32 const isDictionary = (ms->loadedDictEnd != 0);
|
||||
U32 const maxDistance = 1U << windowLog;
|
||||
U32 const lowestValid = ms->window.lowLimit;
|
||||
U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
|
||||
U32 const isDictionary = (ms->loadedDictEnd != 0);
|
||||
/* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
|
||||
* is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
|
||||
* valid for the entire block. So this check is sufficient to find the lowest valid match index.
|
||||
*/
|
||||
U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
|
||||
U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
|
||||
return matchLowest;
|
||||
}
|
||||
|
||||
|
@ -73,7 +73,8 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
void* entropyWorkspace, size_t entropyWorkspaceSize,
|
||||
const int bmi2)
|
||||
const int bmi2,
|
||||
unsigned suspectUncompressible)
|
||||
{
|
||||
size_t const minGain = ZSTD_minGain(srcSize, strategy);
|
||||
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
|
||||
@ -105,11 +106,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
HUF_compress1X_repeat(
|
||||
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
|
||||
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
|
||||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
|
||||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) :
|
||||
HUF_compress4X_repeat(
|
||||
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
|
||||
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
|
||||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
|
||||
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
|
||||
if (repeat != HUF_repeat_none) {
|
||||
/* reused the existing table */
|
||||
DEBUGLOG(5, "Reusing previous huffman table");
|
||||
@ -117,7 +118,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
}
|
||||
}
|
||||
|
||||
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
|
||||
if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||
}
|
||||
|
@ -18,12 +18,14 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
|
||||
|
||||
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
||||
|
||||
/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
|
||||
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
ZSTD_hufCTables_t* nextHuf,
|
||||
ZSTD_strategy strategy, int disableLiteralCompression,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
void* entropyWorkspace, size_t entropyWorkspaceSize,
|
||||
const int bmi2);
|
||||
const int bmi2,
|
||||
unsigned suspectUncompressible);
|
||||
|
||||
#endif /* ZSTD_COMPRESS_LITERALS_H */
|
||||
|
@ -85,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t
|
||||
{
|
||||
unsigned cost = 0;
|
||||
unsigned s;
|
||||
|
||||
assert(total > 0);
|
||||
for (s = 0; s <= max; ++s) {
|
||||
unsigned norm = (unsigned)((256 * count[s]) / total);
|
||||
if (count[s] != 0 && norm == 0)
|
||||
@ -273,10 +275,11 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
||||
assert(nbSeq_1 > 1);
|
||||
assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
|
||||
(void)entropyWorkspaceSize;
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog); /* overflow protected */
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed");
|
||||
assert(oend >= op);
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */
|
||||
FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
|
||||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
|
||||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed");
|
||||
return NCountSize;
|
||||
}
|
||||
}
|
||||
@ -310,19 +313,19 @@ ZSTD_encodeSequences_body(
|
||||
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
|
||||
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
|
||||
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
||||
if (longOffsets) {
|
||||
U32 const ofBits = ofCodeTable[nbSeq-1];
|
||||
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
||||
if (extraBits) {
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits);
|
||||
BIT_flushBits(&blockStream);
|
||||
}
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits,
|
||||
ofBits - extraBits);
|
||||
} else {
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
|
||||
}
|
||||
BIT_flushBits(&blockStream);
|
||||
|
||||
@ -336,8 +339,8 @@ ZSTD_encodeSequences_body(
|
||||
U32 const mlBits = ML_bits[mlCode];
|
||||
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
|
||||
(unsigned)sequences[n].litLength,
|
||||
(unsigned)sequences[n].matchLength + MINMATCH,
|
||||
(unsigned)sequences[n].offset);
|
||||
(unsigned)sequences[n].mlBase + MINMATCH,
|
||||
(unsigned)sequences[n].offBase);
|
||||
/* 32b*/ /* 64b*/
|
||||
/* (7)*/ /* (7)*/
|
||||
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
|
||||
@ -348,18 +351,18 @@ ZSTD_encodeSequences_body(
|
||||
BIT_flushBits(&blockStream); /* (7)*/
|
||||
BIT_addBits(&blockStream, sequences[n].litLength, llBits);
|
||||
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
|
||||
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
|
||||
BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
|
||||
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
|
||||
if (longOffsets) {
|
||||
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
||||
if (extraBits) {
|
||||
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
|
||||
BIT_addBits(&blockStream, sequences[n].offBase, extraBits);
|
||||
BIT_flushBits(&blockStream); /* (7)*/
|
||||
}
|
||||
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
|
||||
BIT_addBits(&blockStream, sequences[n].offBase >> extraBits,
|
||||
ofBits - extraBits); /* 31 */
|
||||
} else {
|
||||
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
|
||||
BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */
|
||||
}
|
||||
BIT_flushBits(&blockStream); /* (7)*/
|
||||
DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
|
||||
@ -396,7 +399,7 @@ ZSTD_encodeSequences_default(
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t
|
||||
static BMI2_TARGET_ATTRIBUTE size_t
|
||||
ZSTD_encodeSequences_bmi2(
|
||||
void* dst, size_t dstCapacity,
|
||||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
||||
|
@ -15,289 +15,10 @@
|
||||
|
||||
#include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */
|
||||
#include "hist.h" /* HIST_countFast_wksp */
|
||||
#include "zstd_compress_internal.h"
|
||||
#include "zstd_compress_internal.h" /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */
|
||||
#include "zstd_compress_sequences.h"
|
||||
#include "zstd_compress_literals.h"
|
||||
|
||||
/*-*************************************
|
||||
* Superblock entropy buffer structs
|
||||
***************************************/
|
||||
/* ZSTD_hufCTablesMetadata_t :
|
||||
* Stores Literals Block Type for a super-block in hType, and
|
||||
* huffman tree description in hufDesBuffer.
|
||||
* hufDesSize refers to the size of huffman tree description in bytes.
|
||||
* This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
|
||||
typedef struct {
|
||||
symbolEncodingType_e hType;
|
||||
BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
|
||||
size_t hufDesSize;
|
||||
} ZSTD_hufCTablesMetadata_t;
|
||||
|
||||
/* ZSTD_fseCTablesMetadata_t :
|
||||
* Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
|
||||
* fse tables in fseTablesBuffer.
|
||||
* fseTablesSize refers to the size of fse tables in bytes.
|
||||
* This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
|
||||
typedef struct {
|
||||
symbolEncodingType_e llType;
|
||||
symbolEncodingType_e ofType;
|
||||
symbolEncodingType_e mlType;
|
||||
BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
|
||||
size_t fseTablesSize;
|
||||
size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
|
||||
} ZSTD_fseCTablesMetadata_t;
|
||||
|
||||
typedef struct {
|
||||
ZSTD_hufCTablesMetadata_t hufMetadata;
|
||||
ZSTD_fseCTablesMetadata_t fseMetadata;
|
||||
} ZSTD_entropyCTablesMetadata_t;
|
||||
|
||||
|
||||
/* ZSTD_buildSuperBlockEntropy_literal() :
|
||||
* Builds entropy for the super-block literals.
|
||||
* Stores literals block type (raw, rle, compressed, repeat) and
|
||||
* huffman description table to hufMetadata.
|
||||
* @return : size of huffman description table or error code */
|
||||
static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
|
||||
const ZSTD_hufCTables_t* prevHuf,
|
||||
ZSTD_hufCTables_t* nextHuf,
|
||||
ZSTD_hufCTablesMetadata_t* hufMetadata,
|
||||
const int disableLiteralsCompression,
|
||||
void* workspace, size_t wkspSize)
|
||||
{
|
||||
BYTE* const wkspStart = (BYTE*)workspace;
|
||||
BYTE* const wkspEnd = wkspStart + wkspSize;
|
||||
BYTE* const countWkspStart = wkspStart;
|
||||
unsigned* const countWksp = (unsigned*)workspace;
|
||||
const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
|
||||
BYTE* const nodeWksp = countWkspStart + countWkspSize;
|
||||
const size_t nodeWkspSize = wkspEnd-nodeWksp;
|
||||
unsigned maxSymbolValue = 255;
|
||||
unsigned huffLog = HUF_TABLELOG_DEFAULT;
|
||||
HUF_repeat repeat = prevHuf->repeatMode;
|
||||
|
||||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
|
||||
|
||||
/* Prepare nextEntropy assuming reusing the existing table */
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
|
||||
if (disableLiteralsCompression) {
|
||||
DEBUGLOG(5, "set_basic - disabled");
|
||||
hufMetadata->hType = set_basic;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* small ? don't even attempt compression (speed opt) */
|
||||
# define COMPRESS_LITERALS_SIZE_MIN 63
|
||||
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
|
||||
if (srcSize <= minLitSize) {
|
||||
DEBUGLOG(5, "set_basic - too small");
|
||||
hufMetadata->hType = set_basic;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Scan input and build symbol stats */
|
||||
{ size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
|
||||
FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
|
||||
if (largest == srcSize) {
|
||||
DEBUGLOG(5, "set_rle");
|
||||
hufMetadata->hType = set_rle;
|
||||
return 0;
|
||||
}
|
||||
if (largest <= (srcSize >> 7)+4) {
|
||||
DEBUGLOG(5, "set_basic - no gain");
|
||||
hufMetadata->hType = set_basic;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate the previous Huffman table */
|
||||
if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
|
||||
repeat = HUF_repeat_none;
|
||||
}
|
||||
|
||||
/* Build Huffman Tree */
|
||||
ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
|
||||
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
||||
{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
|
||||
maxSymbolValue, huffLog,
|
||||
nodeWksp, nodeWkspSize);
|
||||
FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
|
||||
huffLog = (U32)maxBits;
|
||||
{ /* Build and write the CTable */
|
||||
size_t const newCSize = HUF_estimateCompressedSize(
|
||||
(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
|
||||
size_t const hSize = HUF_writeCTable_wksp(
|
||||
hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
|
||||
(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
|
||||
nodeWksp, nodeWkspSize);
|
||||
/* Check against repeating the previous CTable */
|
||||
if (repeat != HUF_repeat_none) {
|
||||
size_t const oldCSize = HUF_estimateCompressedSize(
|
||||
(HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
|
||||
if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
|
||||
DEBUGLOG(5, "set_repeat - smaller");
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
hufMetadata->hType = set_repeat;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (newCSize + hSize >= srcSize) {
|
||||
DEBUGLOG(5, "set_basic - no gains");
|
||||
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
hufMetadata->hType = set_basic;
|
||||
return 0;
|
||||
}
|
||||
DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
|
||||
hufMetadata->hType = set_compressed;
|
||||
nextHuf->repeatMode = HUF_repeat_check;
|
||||
return hSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ZSTD_buildSuperBlockEntropy_sequences() :
|
||||
* Builds entropy for the super-block sequences.
|
||||
* Stores symbol compression modes and fse table to fseMetadata.
|
||||
* @return : size of fse tables or error code */
|
||||
static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
|
||||
const ZSTD_fseCTables_t* prevEntropy,
|
||||
ZSTD_fseCTables_t* nextEntropy,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
ZSTD_fseCTablesMetadata_t* fseMetadata,
|
||||
void* workspace, size_t wkspSize)
|
||||
{
|
||||
BYTE* const wkspStart = (BYTE*)workspace;
|
||||
BYTE* const wkspEnd = wkspStart + wkspSize;
|
||||
BYTE* const countWkspStart = wkspStart;
|
||||
unsigned* const countWksp = (unsigned*)workspace;
|
||||
const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
|
||||
BYTE* const cTableWksp = countWkspStart + countWkspSize;
|
||||
const size_t cTableWkspSize = wkspEnd-cTableWksp;
|
||||
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
||||
FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
|
||||
FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
|
||||
FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
|
||||
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
||||
const BYTE* const llCodeTable = seqStorePtr->llCode;
|
||||
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
||||
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
||||
BYTE* const ostart = fseMetadata->fseTablesBuffer;
|
||||
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
|
||||
BYTE* op = ostart;
|
||||
|
||||
assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
|
||||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
|
||||
ZSTD_memset(workspace, 0, wkspSize);
|
||||
|
||||
fseMetadata->lastCountSize = 0;
|
||||
/* convert length/distances into codes */
|
||||
ZSTD_seqToCodes(seqStorePtr);
|
||||
/* build CTable for Literal Lengths */
|
||||
{ U32 LLtype;
|
||||
unsigned max = MaxLL;
|
||||
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
||||
DEBUGLOG(5, "Building LL table");
|
||||
nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
|
||||
LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
|
||||
countWksp, max, mostFrequent, nbSeq,
|
||||
LLFSELog, prevEntropy->litlengthCTable,
|
||||
LL_defaultNorm, LL_defaultNormLog,
|
||||
ZSTD_defaultAllowed, strategy);
|
||||
assert(set_basic < set_compressed && set_rle < set_compressed);
|
||||
assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
||||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
|
||||
countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
|
||||
prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
|
||||
cTableWksp, cTableWkspSize);
|
||||
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
|
||||
if (LLtype == set_compressed)
|
||||
fseMetadata->lastCountSize = countSize;
|
||||
op += countSize;
|
||||
fseMetadata->llType = (symbolEncodingType_e) LLtype;
|
||||
} }
|
||||
/* build CTable for Offsets */
|
||||
{ U32 Offtype;
|
||||
unsigned max = MaxOff;
|
||||
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
||||
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
|
||||
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
|
||||
DEBUGLOG(5, "Building OF table");
|
||||
nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
|
||||
Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
|
||||
countWksp, max, mostFrequent, nbSeq,
|
||||
OffFSELog, prevEntropy->offcodeCTable,
|
||||
OF_defaultNorm, OF_defaultNormLog,
|
||||
defaultPolicy, strategy);
|
||||
assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
||||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
|
||||
countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
||||
prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
|
||||
cTableWksp, cTableWkspSize);
|
||||
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
|
||||
if (Offtype == set_compressed)
|
||||
fseMetadata->lastCountSize = countSize;
|
||||
op += countSize;
|
||||
fseMetadata->ofType = (symbolEncodingType_e) Offtype;
|
||||
} }
|
||||
/* build CTable for MatchLengths */
|
||||
{ U32 MLtype;
|
||||
unsigned max = MaxML;
|
||||
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
||||
DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
|
||||
nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
|
||||
MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
|
||||
countWksp, max, mostFrequent, nbSeq,
|
||||
MLFSELog, prevEntropy->matchlengthCTable,
|
||||
ML_defaultNorm, ML_defaultNormLog,
|
||||
ZSTD_defaultAllowed, strategy);
|
||||
assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
||||
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
|
||||
countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
|
||||
prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
|
||||
cTableWksp, cTableWkspSize);
|
||||
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
|
||||
if (MLtype == set_compressed)
|
||||
fseMetadata->lastCountSize = countSize;
|
||||
op += countSize;
|
||||
fseMetadata->mlType = (symbolEncodingType_e) MLtype;
|
||||
} }
|
||||
assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
|
||||
return op-ostart;
|
||||
}
|
||||
|
||||
|
||||
/* ZSTD_buildSuperBlockEntropy() :
|
||||
* Builds entropy for the super-block.
|
||||
* @return : 0 on success or error code */
|
||||
static size_t
|
||||
ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
|
||||
const ZSTD_entropyCTables_t* prevEntropy,
|
||||
ZSTD_entropyCTables_t* nextEntropy,
|
||||
const ZSTD_CCtx_params* cctxParams,
|
||||
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
||||
void* workspace, size_t wkspSize)
|
||||
{
|
||||
size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
|
||||
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
|
||||
entropyMetadata->hufMetadata.hufDesSize =
|
||||
ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
|
||||
&prevEntropy->huf, &nextEntropy->huf,
|
||||
&entropyMetadata->hufMetadata,
|
||||
ZSTD_disableLiteralsCompression(cctxParams),
|
||||
workspace, wkspSize);
|
||||
FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
|
||||
entropyMetadata->fseMetadata.fseTablesSize =
|
||||
ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
|
||||
&prevEntropy->fse, &nextEntropy->fse,
|
||||
cctxParams,
|
||||
&entropyMetadata->fseMetadata,
|
||||
workspace, wkspSize);
|
||||
FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ZSTD_compressSubBlock_literal() :
|
||||
* Compresses literals section for a sub-block.
|
||||
* When we have to write the Huffman table we will sometimes choose a header
|
||||
@ -411,8 +132,7 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
|
||||
const seqDef* sp = sstart;
|
||||
size_t matchLengthSum = 0;
|
||||
size_t litLengthSum = 0;
|
||||
/* Only used by assert(), suppress unused variable warnings in production. */
|
||||
(void)litLengthSum;
|
||||
(void)(litLengthSum); /* suppress unused variable warning on some environments */
|
||||
while (send-sp > 0) {
|
||||
ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
|
||||
litLengthSum += seqLen.litLength;
|
||||
@ -605,7 +325,7 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
|
||||
static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
||||
const BYTE* codeTable, unsigned maxCode,
|
||||
size_t nbSeq, const FSE_CTable* fseCTable,
|
||||
const U32* additionalBits,
|
||||
const U8* additionalBits,
|
||||
short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
||||
void* workspace, size_t wkspSize)
|
||||
{
|
||||
@ -646,8 +366,9 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
|
||||
void* workspace, size_t wkspSize,
|
||||
int writeEntropy)
|
||||
{
|
||||
size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
|
||||
size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
|
||||
size_t cSeqSizeEstimate = 0;
|
||||
if (nbSeq == 0) return sequencesSectionHeaderSize;
|
||||
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
|
||||
nbSeq, fseTables->offcodeCTable, NULL,
|
||||
OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
||||
@ -754,7 +475,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
||||
/* I think there is an optimization opportunity here.
|
||||
* Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
|
||||
* since it recalculates estimate from scratch.
|
||||
* For example, it would recount literal distribution and symbol codes everytime.
|
||||
* For example, it would recount literal distribution and symbol codes every time.
|
||||
*/
|
||||
cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
|
||||
&nextCBlock->entropy, entropyMetadata,
|
||||
@ -818,7 +539,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
||||
repcodes_t rep;
|
||||
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
|
||||
for (seq = sstart; seq < sp; ++seq) {
|
||||
rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
|
||||
ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
|
||||
}
|
||||
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
|
||||
}
|
||||
@ -833,7 +554,7 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
||||
unsigned lastBlock) {
|
||||
ZSTD_entropyCTablesMetadata_t entropyMetadata;
|
||||
|
||||
FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
|
||||
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
|
||||
&zc->blockState.prevCBlock->entropy,
|
||||
&zc->blockState.nextCBlock->entropy,
|
||||
&zc->appliedParams,
|
||||
|
@ -32,6 +32,10 @@
|
||||
#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
|
||||
#endif
|
||||
|
||||
|
||||
/* Set our tables and aligneds to align by 64 bytes */
|
||||
#define ZSTD_CWKSP_ALIGNMENT_BYTES 64
|
||||
|
||||
/*-*************************************
|
||||
* Structures
|
||||
***************************************/
|
||||
@ -114,10 +118,11 @@ typedef enum {
|
||||
* - Tables: these are any of several different datastructures (hash tables,
|
||||
* chain tables, binary trees) that all respect a common format: they are
|
||||
* uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
|
||||
* Their sizes depend on the cparams.
|
||||
* Their sizes depend on the cparams. These tables are 64-byte aligned.
|
||||
*
|
||||
* - Aligned: these buffers are used for various purposes that require 4 byte
|
||||
* alignment, but don't require any initialization before they're used.
|
||||
* alignment, but don't require any initialization before they're used. These
|
||||
* buffers are each aligned to 64 bytes.
|
||||
*
|
||||
* - Buffers: these buffers are used for various purposes that don't require
|
||||
* any alignment or initialization before they're used. This means they can
|
||||
@ -130,8 +135,7 @@ typedef enum {
|
||||
*
|
||||
* 1. Objects
|
||||
* 2. Buffers
|
||||
* 3. Aligned
|
||||
* 4. Tables
|
||||
* 3. Aligned/Tables
|
||||
*
|
||||
* Attempts to reserve objects of different types out of order will fail.
|
||||
*/
|
||||
@ -184,6 +188,8 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
|
||||
* Since tables aren't currently redzoned, you don't need to call through this
|
||||
* to figure out how much space you need for the matchState tables. Everything
|
||||
* else is though.
|
||||
*
|
||||
* Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size().
|
||||
*/
|
||||
MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
|
||||
if (size == 0)
|
||||
@ -191,53 +197,55 @@ MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
|
||||
return size;
|
||||
}
|
||||
|
||||
MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
|
||||
ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
|
||||
assert(phase >= ws->phase);
|
||||
if (phase > ws->phase) {
|
||||
if (ws->phase < ZSTD_cwksp_alloc_buffers &&
|
||||
phase >= ZSTD_cwksp_alloc_buffers) {
|
||||
ws->tableValidEnd = ws->objectEnd;
|
||||
}
|
||||
if (ws->phase < ZSTD_cwksp_alloc_aligned &&
|
||||
phase >= ZSTD_cwksp_alloc_aligned) {
|
||||
/* If unaligned allocations down from a too-large top have left us
|
||||
* unaligned, we need to realign our alloc ptr. Technically, this
|
||||
* can consume space that is unaccounted for in the neededSpace
|
||||
* calculation. However, I believe this can only happen when the
|
||||
* workspace is too large, and specifically when it is too large
|
||||
* by a larger margin than the space that will be consumed. */
|
||||
/* TODO: cleaner, compiler warning friendly way to do this??? */
|
||||
ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
|
||||
if (ws->allocStart < ws->tableValidEnd) {
|
||||
ws->tableValidEnd = ws->allocStart;
|
||||
}
|
||||
}
|
||||
ws->phase = phase;
|
||||
}
|
||||
/*
|
||||
* Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes.
|
||||
* Used to determine the number of bytes required for a given "aligned".
|
||||
*/
|
||||
MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
|
||||
return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES));
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns whether this object/buffer/etc was allocated in this workspace.
|
||||
* Returns the amount of additional space the cwksp must allocate
|
||||
* for internal purposes (currently only alignment).
|
||||
*/
|
||||
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
|
||||
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
|
||||
MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
|
||||
/* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
|
||||
* to align the beginning of tables section, as well as another n_2=[0, 63] bytes
|
||||
* to align the beginning of the aligned section.
|
||||
*
|
||||
* n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
|
||||
* aligneds being sized in multiples of 64 bytes.
|
||||
*/
|
||||
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
|
||||
return slackSpace;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Return the number of additional bytes required to align a pointer to the given number of bytes.
|
||||
* alignBytes must be a power of two.
|
||||
*/
|
||||
MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) {
|
||||
size_t const alignBytesMask = alignBytes - 1;
|
||||
size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
|
||||
assert((alignBytes & alignBytesMask) == 0);
|
||||
assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal function. Do not use directly.
|
||||
* Reserves the given number of bytes within the aligned/buffer segment of the wksp,
|
||||
* which counts from the end of the wksp (as opposed to the object/table segment).
|
||||
*
|
||||
* Returns a pointer to the beginning of that space.
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_internal(
|
||||
ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
|
||||
void* alloc;
|
||||
void* bottom = ws->tableEnd;
|
||||
ZSTD_cwksp_internal_advance_phase(ws, phase);
|
||||
alloc = (BYTE *)ws->allocStart - bytes;
|
||||
|
||||
if (bytes == 0)
|
||||
return NULL;
|
||||
|
||||
|
||||
MEM_STATIC void*
|
||||
ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes)
|
||||
{
|
||||
void* const alloc = (BYTE*)ws->allocStart - bytes;
|
||||
void* const bottom = ws->tableEnd;
|
||||
DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
|
||||
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
|
||||
ZSTD_cwksp_assert_internal_consistency(ws);
|
||||
@ -247,10 +255,81 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal(
|
||||
ws->allocFailed = 1;
|
||||
return NULL;
|
||||
}
|
||||
/* the area is reserved from the end of wksp.
|
||||
* If it overlaps with tableValidEnd, it voids guarantees on values' range */
|
||||
if (alloc < ws->tableValidEnd) {
|
||||
ws->tableValidEnd = alloc;
|
||||
}
|
||||
ws->allocStart = alloc;
|
||||
return alloc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Moves the cwksp to the next phase, and does any necessary allocations.
|
||||
* cwksp initialization must necessarily go through each phase in order.
|
||||
* Returns a 0 on success, or zstd error
|
||||
*/
|
||||
MEM_STATIC size_t
|
||||
ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase)
|
||||
{
|
||||
assert(phase >= ws->phase);
|
||||
if (phase > ws->phase) {
|
||||
/* Going from allocating objects to allocating buffers */
|
||||
if (ws->phase < ZSTD_cwksp_alloc_buffers &&
|
||||
phase >= ZSTD_cwksp_alloc_buffers) {
|
||||
ws->tableValidEnd = ws->objectEnd;
|
||||
}
|
||||
|
||||
/* Going from allocating buffers to allocating aligneds/tables */
|
||||
if (ws->phase < ZSTD_cwksp_alloc_aligned &&
|
||||
phase >= ZSTD_cwksp_alloc_aligned) {
|
||||
{ /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
|
||||
size_t const bytesToAlign =
|
||||
ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
|
||||
DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
|
||||
ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
|
||||
RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
|
||||
memory_allocation, "aligned phase - alignment initial allocation failed!");
|
||||
}
|
||||
{ /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
|
||||
void* const alloc = ws->objectEnd;
|
||||
size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
|
||||
void* const objectEnd = (BYTE*)alloc + bytesToAlign;
|
||||
DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
|
||||
RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
|
||||
"table phase - alignment initial allocation failed!");
|
||||
ws->objectEnd = objectEnd;
|
||||
ws->tableEnd = objectEnd; /* table area starts being empty */
|
||||
if (ws->tableValidEnd < ws->tableEnd) {
|
||||
ws->tableValidEnd = ws->tableEnd;
|
||||
} } }
|
||||
ws->phase = phase;
|
||||
ZSTD_cwksp_assert_internal_consistency(ws);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns whether this object/buffer/etc was allocated in this workspace.
|
||||
*/
|
||||
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
|
||||
{
|
||||
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal function. Do not use directly.
|
||||
*/
|
||||
MEM_STATIC void*
|
||||
ZSTD_cwksp_reserve_internal(ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase)
|
||||
{
|
||||
void* alloc;
|
||||
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes);
|
||||
|
||||
|
||||
return alloc;
|
||||
@ -259,33 +338,44 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal(
|
||||
/*
|
||||
* Reserves and returns unaligned memory.
|
||||
*/
|
||||
MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
|
||||
MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
|
||||
{
|
||||
return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reserves and returns memory sized on and aligned on sizeof(unsigned).
|
||||
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
|
||||
assert((bytes & (sizeof(U32)-1)) == 0);
|
||||
return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
|
||||
{
|
||||
void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
|
||||
ZSTD_cwksp_alloc_aligned);
|
||||
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Aligned on sizeof(unsigned). These buffers have the special property that
|
||||
* Aligned on 64 bytes. These buffers have the special property that
|
||||
* their values remain constrained, allowing us to re-use them without
|
||||
* memset()-ing them.
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
|
||||
{
|
||||
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
|
||||
void* alloc = ws->tableEnd;
|
||||
void* end = (BYTE *)alloc + bytes;
|
||||
void* top = ws->allocStart;
|
||||
void* alloc;
|
||||
void* end;
|
||||
void* top;
|
||||
|
||||
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
|
||||
return NULL;
|
||||
}
|
||||
alloc = ws->tableEnd;
|
||||
end = (BYTE *)alloc + bytes;
|
||||
top = ws->allocStart;
|
||||
|
||||
DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
|
||||
alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
|
||||
assert((bytes & (sizeof(U32)-1)) == 0);
|
||||
ZSTD_cwksp_internal_advance_phase(ws, phase);
|
||||
ZSTD_cwksp_assert_internal_consistency(ws);
|
||||
assert(end <= top);
|
||||
if (end > top) {
|
||||
@ -296,27 +386,31 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
|
||||
ws->tableEnd = end;
|
||||
|
||||
|
||||
assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
|
||||
assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
|
||||
return alloc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Aligned on sizeof(void*).
|
||||
* Note : should happen only once, at workspace first initialization
|
||||
*/
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
|
||||
size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
|
||||
MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes)
|
||||
{
|
||||
size_t const roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
|
||||
void* alloc = ws->objectEnd;
|
||||
void* end = (BYTE*)alloc + roundedBytes;
|
||||
|
||||
|
||||
DEBUGLOG(5,
|
||||
DEBUGLOG(4,
|
||||
"cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
|
||||
alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
|
||||
assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
|
||||
assert((bytes & (sizeof(void*)-1)) == 0);
|
||||
assert((size_t)alloc % ZSTD_ALIGNOF(void*) == 0);
|
||||
assert(bytes % ZSTD_ALIGNOF(void*) == 0);
|
||||
ZSTD_cwksp_assert_internal_consistency(ws);
|
||||
/* we must be in the first phase, no advance is possible */
|
||||
if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
|
||||
DEBUGLOG(4, "cwksp: object alloc failed!");
|
||||
DEBUGLOG(3, "cwksp: object alloc failed!");
|
||||
ws->allocFailed = 1;
|
||||
return NULL;
|
||||
}
|
||||
@ -328,7 +422,8 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
|
||||
return alloc;
|
||||
}
|
||||
|
||||
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
|
||||
MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
|
||||
{
|
||||
DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
|
||||
|
||||
|
||||
@ -451,6 +546,24 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
|
||||
* Functions Checking Free Space
|
||||
***************************************/
|
||||
|
||||
/* ZSTD_alignmentSpaceWithinBounds() :
|
||||
* Returns if the estimated space needed for a wksp is within an acceptable limit of the
|
||||
* actual amount of space used.
|
||||
*/
|
||||
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
|
||||
size_t const estimatedSpace, int resizedWorkspace) {
|
||||
if (resizedWorkspace) {
|
||||
/* Resized/newly allocated wksp should have exact bounds */
|
||||
return ZSTD_cwksp_used(ws) == estimatedSpace;
|
||||
} else {
|
||||
/* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
|
||||
* than estimatedSpace. See the comments in zstd_cwksp.h for details.
|
||||
*/
|
||||
return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
|
||||
return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
|
||||
}
|
||||
|
@ -48,10 +48,216 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
size_t ZSTD_compressBlock_doubleFast_noDict_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize, U32 const mls /* template */)
|
||||
{
|
||||
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
||||
U32* const hashLong = ms->hashTable;
|
||||
const U32 hBitsL = cParams->hashLog;
|
||||
U32* const hashSmall = ms->chainTable;
|
||||
const U32 hBitsS = cParams->chainLog;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
const BYTE* anchor = istart;
|
||||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
||||
/* presumes that, if there is a dictionary, it must be using Attach mode */
|
||||
const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
|
||||
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
||||
U32 offset_1=rep[0], offset_2=rep[1];
|
||||
U32 offsetSaved = 0;
|
||||
|
||||
size_t mLength;
|
||||
U32 offset;
|
||||
U32 curr;
|
||||
|
||||
/* how many positions to search before increasing step size */
|
||||
const size_t kStepIncr = 1 << kSearchStrength;
|
||||
/* the position at which to increment the step size if no match is found */
|
||||
const BYTE* nextStep;
|
||||
size_t step; /* the current step size */
|
||||
|
||||
size_t hl0; /* the long hash at ip */
|
||||
size_t hl1; /* the long hash at ip1 */
|
||||
|
||||
U32 idxl0; /* the long match index for ip */
|
||||
U32 idxl1; /* the long match index for ip1 */
|
||||
|
||||
const BYTE* matchl0; /* the long match for ip */
|
||||
const BYTE* matchs0; /* the short match for ip */
|
||||
const BYTE* matchl1; /* the long match for ip1 */
|
||||
|
||||
const BYTE* ip = istart; /* the current position */
|
||||
const BYTE* ip1; /* the next position */
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
|
||||
|
||||
/* init */
|
||||
ip += ((ip - prefixLowest) == 0);
|
||||
{
|
||||
U32 const current = (U32)(ip - base);
|
||||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
|
||||
U32 const maxRep = current - windowLow;
|
||||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
||||
}
|
||||
|
||||
/* Outer Loop: one iteration per match found and stored */
|
||||
while (1) {
|
||||
step = 1;
|
||||
nextStep = ip + kStepIncr;
|
||||
ip1 = ip + step;
|
||||
|
||||
if (ip1 > ilimit) {
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
|
||||
idxl0 = hashLong[hl0];
|
||||
matchl0 = base + idxl0;
|
||||
|
||||
/* Inner Loop: one iteration per search / position */
|
||||
do {
|
||||
const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
|
||||
const U32 idxs0 = hashSmall[hs0];
|
||||
curr = (U32)(ip-base);
|
||||
matchs0 = base + idxs0;
|
||||
|
||||
hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */
|
||||
|
||||
/* check noDict repcode */
|
||||
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
|
||||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
|
||||
goto _match_stored;
|
||||
}
|
||||
|
||||
hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
|
||||
|
||||
if (idxl0 > prefixLowestIndex) {
|
||||
/* check prefix long match */
|
||||
if (MEM_read64(matchl0) == MEM_read64(ip)) {
|
||||
mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
|
||||
offset = (U32)(ip-matchl0);
|
||||
while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
}
|
||||
|
||||
idxl1 = hashLong[hl1];
|
||||
matchl1 = base + idxl1;
|
||||
|
||||
if (idxs0 > prefixLowestIndex) {
|
||||
/* check prefix short match */
|
||||
if (MEM_read32(matchs0) == MEM_read32(ip)) {
|
||||
goto _search_next_long;
|
||||
}
|
||||
}
|
||||
|
||||
if (ip1 >= nextStep) {
|
||||
PREFETCH_L1(ip1 + 64);
|
||||
PREFETCH_L1(ip1 + 128);
|
||||
step++;
|
||||
nextStep += kStepIncr;
|
||||
}
|
||||
ip = ip1;
|
||||
ip1 += step;
|
||||
|
||||
hl0 = hl1;
|
||||
idxl0 = idxl1;
|
||||
matchl0 = matchl1;
|
||||
#if defined(__aarch64__)
|
||||
PREFETCH_L1(ip+256);
|
||||
#endif
|
||||
} while (ip1 <= ilimit);
|
||||
|
||||
_cleanup:
|
||||
/* save reps for next block */
|
||||
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
||||
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
||||
|
||||
/* Return the last literals size */
|
||||
return (size_t)(iend - anchor);
|
||||
|
||||
_search_next_long:
|
||||
|
||||
/* check prefix long +1 match */
|
||||
if (idxl1 > prefixLowestIndex) {
|
||||
if (MEM_read64(matchl1) == MEM_read64(ip1)) {
|
||||
ip = ip1;
|
||||
mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
|
||||
offset = (U32)(ip-matchl1);
|
||||
while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
}
|
||||
|
||||
/* if no long +1 match, explore the short match we found */
|
||||
mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
|
||||
offset = (U32)(ip - matchs0);
|
||||
while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
|
||||
|
||||
/* fall-through */
|
||||
|
||||
_match_found: /* requires ip, offset, mLength */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
|
||||
if (step < 4) {
|
||||
/* It is unsafe to write this value back to the hashtable when ip1 is
|
||||
* greater than or equal to the new ip we will have after we're done
|
||||
* processing this match. Rather than perform that test directly
|
||||
* (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
|
||||
* more predictable test. The minmatch even if we take a short match is
|
||||
* 4 bytes, so as long as step, the distance between ip and ip1
|
||||
* (initially) is less than 4, we know ip1 < new ip. */
|
||||
hashLong[hl1] = (U32)(ip1 - base);
|
||||
}
|
||||
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
|
||||
|
||||
_match_stored:
|
||||
/* match found */
|
||||
ip += mLength;
|
||||
anchor = ip;
|
||||
|
||||
if (ip <= ilimit) {
|
||||
/* Complementary insertion */
|
||||
/* done after iLimit test, as candidates could be > iend-8 */
|
||||
{ U32 const indexToInsert = curr+2;
|
||||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
||||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
||||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
||||
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
while ( (ip <= ilimit)
|
||||
&& ( (offset_2>0)
|
||||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
||||
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength);
|
||||
ip += rLength;
|
||||
anchor = ip;
|
||||
continue; /* faster when present ... (?) */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize,
|
||||
U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
|
||||
U32 const mls /* template */)
|
||||
{
|
||||
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
||||
U32* const hashLong = ms->hashTable;
|
||||
@ -72,54 +278,30 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
U32 offsetSaved = 0;
|
||||
|
||||
const ZSTD_matchState_t* const dms = ms->dictMatchState;
|
||||
const ZSTD_compressionParameters* const dictCParams =
|
||||
dictMode == ZSTD_dictMatchState ?
|
||||
&dms->cParams : NULL;
|
||||
const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ?
|
||||
dms->hashTable : NULL;
|
||||
const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
|
||||
dms->chainTable : NULL;
|
||||
const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.dictLimit : 0;
|
||||
const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.base : NULL;
|
||||
const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
|
||||
dictBase + dictStartIndex : NULL;
|
||||
const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
|
||||
dms->window.nextSrc : NULL;
|
||||
const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
|
||||
prefixLowestIndex - (U32)(dictEnd - dictBase) :
|
||||
0;
|
||||
const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ?
|
||||
dictCParams->hashLog : hBitsL;
|
||||
const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
|
||||
dictCParams->chainLog : hBitsS;
|
||||
const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
|
||||
const U32* const dictHashLong = dms->hashTable;
|
||||
const U32* const dictHashSmall = dms->chainTable;
|
||||
const U32 dictStartIndex = dms->window.dictLimit;
|
||||
const BYTE* const dictBase = dms->window.base;
|
||||
const BYTE* const dictStart = dictBase + dictStartIndex;
|
||||
const BYTE* const dictEnd = dms->window.nextSrc;
|
||||
const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
|
||||
const U32 dictHBitsL = dictCParams->hashLog;
|
||||
const U32 dictHBitsS = dictCParams->chainLog;
|
||||
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
|
||||
|
||||
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
|
||||
|
||||
/* if a dictionary is attached, it must be within window range */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
|
||||
}
|
||||
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
|
||||
|
||||
/* init */
|
||||
ip += (dictAndPrefixLength == 0);
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
U32 const curr = (U32)(ip - base);
|
||||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
|
||||
U32 const maxRep = curr - windowLow;
|
||||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
||||
}
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
/* dictMatchState repCode checks don't currently handle repCode == 0
|
||||
* disabling. */
|
||||
assert(offset_1 <= dictAndPrefixLength);
|
||||
assert(offset_2 <= dictAndPrefixLength);
|
||||
}
|
||||
|
||||
/* dictMatchState repCode checks don't currently handle repCode == 0
|
||||
* disabling. */
|
||||
assert(offset_1 <= dictAndPrefixLength);
|
||||
assert(offset_2 <= dictAndPrefixLength);
|
||||
|
||||
/* Main Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
|
||||
@ -135,29 +317,18 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
const BYTE* matchLong = base + matchIndexL;
|
||||
const BYTE* match = base + matchIndexS;
|
||||
const U32 repIndex = curr + 1 - offset_1;
|
||||
const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
|
||||
&& repIndex < prefixLowestIndex) ?
|
||||
const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
|
||||
dictBase + (repIndex - dictIndexDelta) :
|
||||
base + repIndex;
|
||||
hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
|
||||
|
||||
/* check dictMatchState repcode */
|
||||
if (dictMode == ZSTD_dictMatchState
|
||||
&& ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
|
||||
/* check repcode */
|
||||
if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
||||
goto _match_stored;
|
||||
}
|
||||
|
||||
/* check noDict repcode */
|
||||
if ( dictMode == ZSTD_noDict
|
||||
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
|
||||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
|
||||
goto _match_stored;
|
||||
}
|
||||
|
||||
@ -169,7 +340,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
} else if (dictMode == ZSTD_dictMatchState) {
|
||||
} else {
|
||||
/* check dictMatchState long match */
|
||||
U32 const dictMatchIndexL = dictHashLong[dictHL];
|
||||
const BYTE* dictMatchL = dictBase + dictMatchIndexL;
|
||||
@ -187,7 +358,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
if (MEM_read32(match) == MEM_read32(ip)) {
|
||||
goto _search_next_long;
|
||||
}
|
||||
} else if (dictMode == ZSTD_dictMatchState) {
|
||||
} else {
|
||||
/* check dictMatchState short match */
|
||||
U32 const dictMatchIndexS = dictHashSmall[dictHS];
|
||||
match = dictBase + dictMatchIndexS;
|
||||
@ -220,7 +391,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
} else if (dictMode == ZSTD_dictMatchState) {
|
||||
} else {
|
||||
/* check dict long +1 match */
|
||||
U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
|
||||
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
|
||||
@ -234,7 +405,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
} } }
|
||||
|
||||
/* if no long +1 match, explore the short match we found */
|
||||
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
|
||||
if (matchIndexS < prefixLowestIndex) {
|
||||
mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
|
||||
offset = (U32)(curr - matchIndexS);
|
||||
while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
@ -248,7 +419,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
|
||||
|
||||
_match_stored:
|
||||
/* match found */
|
||||
@ -266,43 +437,27 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
while (ip <= ilimit) {
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
|
||||
&& repIndex2 < prefixLowestIndex ?
|
||||
dictBase + repIndex2 - dictIndexDelta :
|
||||
base + repIndex2;
|
||||
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
|
||||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
||||
ip += repLength2;
|
||||
anchor = ip;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
} }
|
||||
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
while ( (ip <= ilimit)
|
||||
&& ( (offset_2>0)
|
||||
& (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
|
||||
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
|
||||
ip += rLength;
|
||||
while (ip <= ilimit) {
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
|
||||
dictBase + repIndex2 - dictIndexDelta :
|
||||
base + repIndex2;
|
||||
if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
|
||||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
||||
ip += repLength2;
|
||||
anchor = ip;
|
||||
continue; /* faster when present ... (?) */
|
||||
} } }
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} /* while (ip < ilimit) */
|
||||
|
||||
/* save reps for next block */
|
||||
@ -313,6 +468,24 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
#define ZSTD_GEN_DFAST_FN(dictMode, mls) \
|
||||
static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
|
||||
void const* src, size_t srcSize) \
|
||||
{ \
|
||||
return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
|
||||
}
|
||||
|
||||
ZSTD_GEN_DFAST_FN(noDict, 4)
|
||||
ZSTD_GEN_DFAST_FN(noDict, 5)
|
||||
ZSTD_GEN_DFAST_FN(noDict, 6)
|
||||
ZSTD_GEN_DFAST_FN(noDict, 7)
|
||||
|
||||
ZSTD_GEN_DFAST_FN(dictMatchState, 4)
|
||||
ZSTD_GEN_DFAST_FN(dictMatchState, 5)
|
||||
ZSTD_GEN_DFAST_FN(dictMatchState, 6)
|
||||
ZSTD_GEN_DFAST_FN(dictMatchState, 7)
|
||||
|
||||
|
||||
size_t ZSTD_compressBlock_doubleFast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@ -323,13 +496,13 @@ size_t ZSTD_compressBlock_doubleFast(
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
||||
@ -343,13 +516,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
||||
@ -385,7 +558,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
|
||||
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
|
||||
if (prefixStartIndex == dictStartIndex)
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
|
||||
|
||||
/* Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
||||
@ -407,12 +580,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
|
||||
|
||||
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
|
||||
& (repIndex > dictStartIndex))
|
||||
& (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
|
||||
} else {
|
||||
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
|
||||
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
|
||||
@ -423,7 +596,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
|
||||
|
||||
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
|
||||
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
||||
@ -448,7 +621,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
}
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
|
||||
|
||||
} else {
|
||||
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
||||
@ -475,12 +648,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
||||
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
|
||||
& (repIndex2 > dictStartIndex))
|
||||
& (offset_2 <= current2 - dictStartIndex))
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
||||
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
|
||||
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
|
||||
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
|
||||
ip += repLength2;
|
||||
@ -498,6 +671,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
ZSTD_GEN_DFAST_FN(extDict, 4)
|
||||
ZSTD_GEN_DFAST_FN(extDict, 5)
|
||||
ZSTD_GEN_DFAST_FN(extDict, 6)
|
||||
ZSTD_GEN_DFAST_FN(extDict, 7)
|
||||
|
||||
size_t ZSTD_compressBlock_doubleFast_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@ -508,12 +685,12 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
|
||||
return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
|
||||
return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
|
||||
return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
|
||||
return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
@ -43,145 +43,294 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* If you squint hard enough (and ignore repcodes), the search operation at any
|
||||
* given position is broken into 4 stages:
|
||||
*
|
||||
* 1. Hash (map position to hash value via input read)
|
||||
* 2. Lookup (map hash val to index via hashtable read)
|
||||
* 3. Load (map index to value at that position via input read)
|
||||
* 4. Compare
|
||||
*
|
||||
* Each of these steps involves a memory read at an address which is computed
|
||||
* from the previous step. This means these steps must be sequenced and their
|
||||
* latencies are cumulative.
|
||||
*
|
||||
* Rather than do 1->2->3->4 sequentially for a single position before moving
|
||||
* onto the next, this implementation interleaves these operations across the
|
||||
* next few positions:
|
||||
*
|
||||
* R = Repcode Read & Compare
|
||||
* H = Hash
|
||||
* T = Table Lookup
|
||||
* M = Match Read & Compare
|
||||
*
|
||||
* Pos | Time -->
|
||||
* ----+-------------------
|
||||
* N | ... M
|
||||
* N+1 | ... TM
|
||||
* N+2 | R H T M
|
||||
* N+3 | H TM
|
||||
* N+4 | R H T M
|
||||
* N+5 | H ...
|
||||
* N+6 | R ...
|
||||
*
|
||||
* This is very much analogous to the pipelining of execution in a CPU. And just
|
||||
* like a CPU, we have to dump the pipeline when we find a match (i.e., take a
|
||||
* branch).
|
||||
*
|
||||
* When this happens, we throw away our current state, and do the following prep
|
||||
* to re-enter the loop:
|
||||
*
|
||||
* Pos | Time -->
|
||||
* ----+-------------------
|
||||
* N | H T
|
||||
* N+1 | H
|
||||
*
|
||||
* This is also the work we do at the beginning to enter the loop initially.
|
||||
*/
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
ZSTD_compressBlock_fast_generic(
|
||||
ZSTD_compressBlock_fast_noDict_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize,
|
||||
U32 const mls)
|
||||
U32 const mls, U32 const hasStep)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32* const hashTable = ms->hashTable;
|
||||
U32 const hlog = cParams->hashLog;
|
||||
/* support stepSize of 0 */
|
||||
size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
|
||||
size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
/* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
|
||||
const BYTE* ip0 = istart;
|
||||
const BYTE* ip1;
|
||||
const BYTE* anchor = istart;
|
||||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
||||
const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
|
||||
const BYTE* const prefixStart = base + prefixStartIndex;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
||||
U32 offset_1=rep[0], offset_2=rep[1];
|
||||
|
||||
const BYTE* anchor = istart;
|
||||
const BYTE* ip0 = istart;
|
||||
const BYTE* ip1;
|
||||
const BYTE* ip2;
|
||||
const BYTE* ip3;
|
||||
U32 current0;
|
||||
|
||||
U32 rep_offset1 = rep[0];
|
||||
U32 rep_offset2 = rep[1];
|
||||
U32 offsetSaved = 0;
|
||||
|
||||
/* init */
|
||||
size_t hash0; /* hash for ip0 */
|
||||
size_t hash1; /* hash for ip1 */
|
||||
U32 idx; /* match idx for ip0 */
|
||||
U32 mval; /* src value at match idx */
|
||||
|
||||
U32 offcode;
|
||||
const BYTE* match0;
|
||||
size_t mLength;
|
||||
|
||||
/* ip0 and ip1 are always adjacent. The targetLength skipping and
|
||||
* uncompressibility acceleration is applied to every other position,
|
||||
* matching the behavior of #1562. step therefore represents the gap
|
||||
* between pairs of positions, from ip0 to ip2 or ip1 to ip3. */
|
||||
size_t step;
|
||||
const BYTE* nextStep;
|
||||
const size_t kStepIncr = (1 << (kSearchStrength - 1));
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
|
||||
ip0 += (ip0 == prefixStart);
|
||||
ip1 = ip0 + 1;
|
||||
{ U32 const curr = (U32)(ip0 - base);
|
||||
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
|
||||
U32 const maxRep = curr - windowLow;
|
||||
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
|
||||
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
|
||||
if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
|
||||
if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
|
||||
}
|
||||
|
||||
/* Main Search Loop */
|
||||
#ifdef __INTEL_COMPILER
|
||||
/* From intel 'The vector pragma indicates that the loop should be
|
||||
* vectorized if it is legal to do so'. Can be used together with
|
||||
* #pragma ivdep (but have opted to exclude that because intel
|
||||
* warns against using it).*/
|
||||
#pragma vector always
|
||||
#endif
|
||||
while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
|
||||
size_t mLength;
|
||||
BYTE const* ip2 = ip0 + 2;
|
||||
size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
|
||||
U32 const val0 = MEM_read32(ip0);
|
||||
size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
|
||||
U32 const val1 = MEM_read32(ip1);
|
||||
U32 const current0 = (U32)(ip0-base);
|
||||
U32 const current1 = (U32)(ip1-base);
|
||||
U32 const matchIndex0 = hashTable[h0];
|
||||
U32 const matchIndex1 = hashTable[h1];
|
||||
BYTE const* repMatch = ip2 - offset_1;
|
||||
const BYTE* match0 = base + matchIndex0;
|
||||
const BYTE* match1 = base + matchIndex1;
|
||||
U32 offcode;
|
||||
/* start each op */
|
||||
_start: /* Requires: ip0 */
|
||||
|
||||
#if defined(__aarch64__)
|
||||
PREFETCH_L1(ip0+256);
|
||||
#endif
|
||||
step = stepSize;
|
||||
nextStep = ip0 + kStepIncr;
|
||||
|
||||
hashTable[h0] = current0; /* update hash table */
|
||||
hashTable[h1] = current1; /* update hash table */
|
||||
/* calculate positions, ip0 - anchor == 0, so we skip step calc */
|
||||
ip1 = ip0 + 1;
|
||||
ip2 = ip0 + step;
|
||||
ip3 = ip2 + 1;
|
||||
|
||||
assert(ip0 + 1 == ip1);
|
||||
if (ip3 >= ilimit) {
|
||||
goto _cleanup;
|
||||
}
|
||||
|
||||
if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
|
||||
mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
|
||||
ip0 = ip2 - mLength;
|
||||
match0 = repMatch - mLength;
|
||||
hash0 = ZSTD_hashPtr(ip0, hlog, mls);
|
||||
hash1 = ZSTD_hashPtr(ip1, hlog, mls);
|
||||
|
||||
idx = hashTable[hash0];
|
||||
|
||||
do {
|
||||
/* load repcode match for ip[2]*/
|
||||
const U32 rval = MEM_read32(ip2 - rep_offset1);
|
||||
|
||||
/* write back hash table entry */
|
||||
current0 = (U32)(ip0 - base);
|
||||
hashTable[hash0] = current0;
|
||||
|
||||
/* check repcode at ip[2] */
|
||||
if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) {
|
||||
ip0 = ip2;
|
||||
match0 = ip0 - rep_offset1;
|
||||
mLength = ip0[-1] == match0[-1];
|
||||
ip0 -= mLength;
|
||||
match0 -= mLength;
|
||||
offcode = STORE_REPCODE_1;
|
||||
mLength += 4;
|
||||
offcode = 0;
|
||||
goto _match;
|
||||
}
|
||||
if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
|
||||
/* found a regular match */
|
||||
|
||||
/* load match for ip[0] */
|
||||
if (idx >= prefixStartIndex) {
|
||||
mval = MEM_read32(base + idx);
|
||||
} else {
|
||||
mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
|
||||
}
|
||||
|
||||
/* check match at ip[0] */
|
||||
if (MEM_read32(ip0) == mval) {
|
||||
/* found a match! */
|
||||
goto _offset;
|
||||
}
|
||||
if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
|
||||
/* found a regular match after one literal */
|
||||
ip0 = ip1;
|
||||
match0 = match1;
|
||||
|
||||
/* lookup ip[1] */
|
||||
idx = hashTable[hash1];
|
||||
|
||||
/* hash ip[2] */
|
||||
hash0 = hash1;
|
||||
hash1 = ZSTD_hashPtr(ip2, hlog, mls);
|
||||
|
||||
/* advance to next positions */
|
||||
ip0 = ip1;
|
||||
ip1 = ip2;
|
||||
ip2 = ip3;
|
||||
|
||||
/* write back hash table entry */
|
||||
current0 = (U32)(ip0 - base);
|
||||
hashTable[hash0] = current0;
|
||||
|
||||
/* load match for ip[0] */
|
||||
if (idx >= prefixStartIndex) {
|
||||
mval = MEM_read32(base + idx);
|
||||
} else {
|
||||
mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
|
||||
}
|
||||
|
||||
/* check match at ip[0] */
|
||||
if (MEM_read32(ip0) == mval) {
|
||||
/* found a match! */
|
||||
goto _offset;
|
||||
}
|
||||
{ size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
|
||||
assert(step >= 2);
|
||||
ip0 += step;
|
||||
ip1 += step;
|
||||
continue;
|
||||
|
||||
/* lookup ip[1] */
|
||||
idx = hashTable[hash1];
|
||||
|
||||
/* hash ip[2] */
|
||||
hash0 = hash1;
|
||||
hash1 = ZSTD_hashPtr(ip2, hlog, mls);
|
||||
|
||||
/* advance to next positions */
|
||||
ip0 = ip1;
|
||||
ip1 = ip2;
|
||||
ip2 = ip0 + step;
|
||||
ip3 = ip1 + step;
|
||||
|
||||
/* calculate step */
|
||||
if (ip2 >= nextStep) {
|
||||
step++;
|
||||
PREFETCH_L1(ip1 + 64);
|
||||
PREFETCH_L1(ip1 + 128);
|
||||
nextStep += kStepIncr;
|
||||
}
|
||||
_offset: /* Requires: ip0, match0 */
|
||||
/* Compute the offset code */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = (U32)(ip0-match0);
|
||||
offcode = offset_1 + ZSTD_REP_MOVE;
|
||||
mLength = 4;
|
||||
/* Count the backwards match length */
|
||||
while (((ip0>anchor) & (match0>prefixStart))
|
||||
&& (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
|
||||
} while (ip3 < ilimit);
|
||||
|
||||
_match: /* Requires: ip0, match0, offcode */
|
||||
/* Count the forward length */
|
||||
mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
|
||||
/* match found */
|
||||
ip0 += mLength;
|
||||
anchor = ip0;
|
||||
|
||||
if (ip0 <= ilimit) {
|
||||
/* Fill Table */
|
||||
assert(base+current0+2 > istart); /* check base overflow */
|
||||
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
|
||||
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
|
||||
|
||||
if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
|
||||
while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
|
||||
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
|
||||
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
|
||||
ip0 += rLength;
|
||||
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
|
||||
anchor = ip0;
|
||||
continue; /* faster when present (confirmed on gcc-8) ... (?) */
|
||||
} } }
|
||||
ip1 = ip0 + 1;
|
||||
}
|
||||
_cleanup:
|
||||
/* Note that there are probably still a couple positions we could search.
|
||||
* However, it seems to be a meaningful performance hit to try to search
|
||||
* them. So let's not. */
|
||||
|
||||
/* save reps for next block */
|
||||
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
||||
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
||||
rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
|
||||
rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
|
||||
|
||||
/* Return the last literals size */
|
||||
return (size_t)(iend - anchor);
|
||||
|
||||
_offset: /* Requires: ip0, idx */
|
||||
|
||||
/* Compute the offset code. */
|
||||
match0 = base + idx;
|
||||
rep_offset2 = rep_offset1;
|
||||
rep_offset1 = (U32)(ip0-match0);
|
||||
offcode = STORE_OFFSET(rep_offset1);
|
||||
mLength = 4;
|
||||
|
||||
/* Count the backwards match length. */
|
||||
while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) {
|
||||
ip0--;
|
||||
match0--;
|
||||
mLength++;
|
||||
}
|
||||
|
||||
_match: /* Requires: ip0, match0, offcode */
|
||||
|
||||
/* Count the forward length. */
|
||||
mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend);
|
||||
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
|
||||
|
||||
ip0 += mLength;
|
||||
anchor = ip0;
|
||||
|
||||
/* write next hash table entry */
|
||||
if (ip1 < ip0) {
|
||||
hashTable[hash1] = (U32)(ip1 - base);
|
||||
}
|
||||
|
||||
/* Fill table and check for immediate repcode. */
|
||||
if (ip0 <= ilimit) {
|
||||
/* Fill Table */
|
||||
assert(base+current0+2 > istart); /* check base overflow */
|
||||
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
|
||||
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
|
||||
|
||||
if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */
|
||||
while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) {
|
||||
/* store sequence */
|
||||
size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4;
|
||||
{ U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
|
||||
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
|
||||
ip0 += rLength;
|
||||
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength);
|
||||
anchor = ip0;
|
||||
continue; /* faster when present (confirmed on gcc-8) ... (?) */
|
||||
} } }
|
||||
|
||||
goto _start;
|
||||
}
|
||||
|
||||
#define ZSTD_GEN_FAST_FN(dictMode, mls, step) \
|
||||
static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step( \
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
|
||||
void const* src, size_t srcSize) \
|
||||
{ \
|
||||
return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \
|
||||
}
|
||||
|
||||
ZSTD_GEN_FAST_FN(noDict, 4, 1)
|
||||
ZSTD_GEN_FAST_FN(noDict, 5, 1)
|
||||
ZSTD_GEN_FAST_FN(noDict, 6, 1)
|
||||
ZSTD_GEN_FAST_FN(noDict, 7, 1)
|
||||
|
||||
ZSTD_GEN_FAST_FN(noDict, 4, 0)
|
||||
ZSTD_GEN_FAST_FN(noDict, 5, 0)
|
||||
ZSTD_GEN_FAST_FN(noDict, 6, 0)
|
||||
ZSTD_GEN_FAST_FN(noDict, 7, 0)
|
||||
|
||||
size_t ZSTD_compressBlock_fast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@ -189,24 +338,40 @@ size_t ZSTD_compressBlock_fast(
|
||||
{
|
||||
U32 const mls = ms->cParams.minMatch;
|
||||
assert(ms->dictMatchState == NULL);
|
||||
switch(mls)
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
|
||||
if (ms->cParams.targetLength > 1) {
|
||||
switch(mls)
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
} else {
|
||||
switch(mls)
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize, U32 const mls)
|
||||
void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32* const hashTable = ms->hashTable;
|
||||
@ -242,6 +407,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
assert(endIndex - prefixStartIndex <= maxDistance);
|
||||
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
|
||||
|
||||
(void)hasStep; /* not currently specialized on whether it's accelerated */
|
||||
|
||||
/* ensure there will be no underflow
|
||||
* when translating a dict index into a local index */
|
||||
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
|
||||
@ -272,7 +439,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
|
||||
} else if ( (matchIndex <= prefixStartIndex) ) {
|
||||
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
|
||||
U32 const dictMatchIndex = dictHashTable[dictHash];
|
||||
@ -292,7 +459,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
} /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
|
||||
}
|
||||
} else if (MEM_read32(match) != MEM_read32(ip)) {
|
||||
/* it's not a match, and we're not going to check the dictionary */
|
||||
@ -307,7 +474,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
|
||||
}
|
||||
|
||||
/* match found */
|
||||
@ -332,7 +499,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
||||
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
|
||||
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
|
||||
ip += repLength2;
|
||||
anchor = ip;
|
||||
@ -351,6 +518,12 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
|
||||
ZSTD_GEN_FAST_FN(dictMatchState, 4, 0)
|
||||
ZSTD_GEN_FAST_FN(dictMatchState, 5, 0)
|
||||
ZSTD_GEN_FAST_FN(dictMatchState, 6, 0)
|
||||
ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)
|
||||
|
||||
size_t ZSTD_compressBlock_fast_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
@ -361,20 +534,20 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
|
||||
return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
|
||||
return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
|
||||
return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
|
||||
return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize, U32 const mls)
|
||||
void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32* const hashTable = ms->hashTable;
|
||||
@ -398,11 +571,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
const BYTE* const ilimit = iend - 8;
|
||||
U32 offset_1=rep[0], offset_2=rep[1];
|
||||
|
||||
(void)hasStep; /* not currently specialized on whether it's accelerated */
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
|
||||
|
||||
/* switch to "regular" variant if extDict is invalidated due to maxDistance */
|
||||
if (prefixStartIndex == dictStartIndex)
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
|
||||
return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
|
||||
|
||||
/* Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
||||
@ -416,14 +591,14 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
const BYTE* const repMatch = repBase + repIndex;
|
||||
hashTable[h] = curr; /* update hash table */
|
||||
DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
|
||||
assert(offset_1 <= curr +1); /* check repIndex */
|
||||
|
||||
if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
|
||||
if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
|
||||
& (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength);
|
||||
ip += rLength;
|
||||
anchor = ip;
|
||||
} else {
|
||||
@ -439,7 +614,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
|
||||
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
offset_2 = offset_1; offset_1 = offset; /* update offset history */
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
|
||||
ip += mLength;
|
||||
anchor = ip;
|
||||
} }
|
||||
@ -453,12 +628,12 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
||||
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
|
||||
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
||||
{ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
|
||||
ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2);
|
||||
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
|
||||
ip += repLength2;
|
||||
anchor = ip;
|
||||
@ -475,6 +650,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
ZSTD_GEN_FAST_FN(extDict, 4, 0)
|
||||
ZSTD_GEN_FAST_FN(extDict, 5, 0)
|
||||
ZSTD_GEN_FAST_FN(extDict, 6, 0)
|
||||
ZSTD_GEN_FAST_FN(extDict, 7, 0)
|
||||
|
||||
size_t ZSTD_compressBlock_fast_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@ -485,12 +664,12 @@ size_t ZSTD_compressBlock_fast_extDict(
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
case 4 :
|
||||
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
|
||||
return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize);
|
||||
case 5 :
|
||||
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
|
||||
return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize);
|
||||
case 6 :
|
||||
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
|
||||
return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize);
|
||||
case 7 :
|
||||
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
|
||||
return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -23,6 +23,7 @@
|
||||
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
|
||||
|
||||
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
|
||||
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
|
||||
|
||||
void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
|
||||
|
||||
@ -40,6 +41,15 @@ size_t ZSTD_compressBlock_lazy(
|
||||
size_t ZSTD_compressBlock_greedy(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy2_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_greedy_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@ -53,6 +63,15 @@ size_t ZSTD_compressBlock_lazy_dictMatchState(
|
||||
size_t ZSTD_compressBlock_greedy_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy_dictMatchState_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_greedy_dictMatchState_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@ -63,6 +82,15 @@ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
|
||||
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
size_t ZSTD_compressBlock_greedy_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@ -73,9 +101,19 @@ size_t ZSTD_compressBlock_lazy_extDict(
|
||||
size_t ZSTD_compressBlock_lazy2_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_greedy_extDict_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy_extDict_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_lazy2_extDict_row(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_btlazy2_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
|
||||
|
||||
#endif /* ZSTD_LAZY_H */
|
||||
|
@ -57,6 +57,33 @@ static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const*
|
||||
}
|
||||
}
|
||||
|
||||
/* ZSTD_ldm_gear_reset()
|
||||
* Feeds [data, data + minMatchLength) into the hash without registering any
|
||||
* splits. This effectively resets the hash state. This is used when skipping
|
||||
* over data, either at the beginning of a block, or skipping sections.
|
||||
*/
|
||||
static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
|
||||
BYTE const* data, size_t minMatchLength)
|
||||
{
|
||||
U64 hash = state->rolling;
|
||||
size_t n = 0;
|
||||
|
||||
#define GEAR_ITER_ONCE() do { \
|
||||
hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
|
||||
n += 1; \
|
||||
} while (0)
|
||||
while (n + 3 < minMatchLength) {
|
||||
GEAR_ITER_ONCE();
|
||||
GEAR_ITER_ONCE();
|
||||
GEAR_ITER_ONCE();
|
||||
GEAR_ITER_ONCE();
|
||||
}
|
||||
while (n < minMatchLength) {
|
||||
GEAR_ITER_ONCE();
|
||||
}
|
||||
#undef GEAR_ITER_ONCE
|
||||
}
|
||||
|
||||
/* ZSTD_ldm_gear_feed():
|
||||
*
|
||||
* Registers in the splits array all the split points found in the first
|
||||
@ -132,12 +159,12 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
|
||||
size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
|
||||
size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
|
||||
+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
|
||||
return params.enableLdm ? totalSize : 0;
|
||||
return params.enableLdm == ZSTD_ps_enable ? totalSize : 0;
|
||||
}
|
||||
|
||||
size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
|
||||
{
|
||||
return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
|
||||
return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0;
|
||||
}
|
||||
|
||||
/* ZSTD_ldm_getBucket() :
|
||||
@ -255,7 +282,7 @@ void ZSTD_ldm_fillHashTable(
|
||||
while (ip < iend) {
|
||||
size_t hashed;
|
||||
unsigned n;
|
||||
|
||||
|
||||
numSplits = 0;
|
||||
hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
|
||||
|
||||
@ -327,16 +354,8 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
||||
|
||||
/* Initialize the rolling hash state with the first minMatchLength bytes */
|
||||
ZSTD_ldm_gear_init(&hashState, params);
|
||||
{
|
||||
size_t n = 0;
|
||||
|
||||
while (n < minMatchLength) {
|
||||
numSplits = 0;
|
||||
n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n,
|
||||
splits, &numSplits);
|
||||
}
|
||||
ip += minMatchLength;
|
||||
}
|
||||
ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
|
||||
ip += minMatchLength;
|
||||
|
||||
while (ip < ilimit) {
|
||||
size_t hashed;
|
||||
@ -361,6 +380,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
||||
for (n = 0; n < numSplits; n++) {
|
||||
size_t forwardMatchLength = 0, backwardMatchLength = 0,
|
||||
bestMatchLength = 0, mLength;
|
||||
U32 offset;
|
||||
BYTE const* const split = candidates[n].split;
|
||||
U32 const checksum = candidates[n].checksum;
|
||||
U32 const hash = candidates[n].hash;
|
||||
@ -428,9 +448,9 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
||||
}
|
||||
|
||||
/* Match found */
|
||||
offset = (U32)(split - base) - bestEntry->offset;
|
||||
mLength = forwardMatchLength + backwardMatchLength;
|
||||
{
|
||||
U32 const offset = (U32)(split - base) - bestEntry->offset;
|
||||
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
|
||||
|
||||
/* Out of sequence storage */
|
||||
@ -447,6 +467,21 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
||||
ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
|
||||
|
||||
anchor = split + forwardMatchLength;
|
||||
|
||||
/* If we find a match that ends after the data that we've hashed
|
||||
* then we have a repeating, overlapping, pattern. E.g. all zeros.
|
||||
* If one repetition of the pattern matches our `stopMask` then all
|
||||
* repetitions will. We don't need to insert them all into out table,
|
||||
* only the first one. So skip over overlapping matches.
|
||||
* This is a major speed boost (20x) for compressing a single byte
|
||||
* repeated, when that byte ends up in the table.
|
||||
*/
|
||||
if (anchor > ip + hashed) {
|
||||
ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
|
||||
/* Continue the outer loop at anchor (ip + hashed == anchor). */
|
||||
ip = anchor - hashed;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ip += hashed;
|
||||
@ -500,7 +535,7 @@ size_t ZSTD_ldm_generateSequences(
|
||||
|
||||
assert(chunkStart < iend);
|
||||
/* 1. Perform overflow correction if necessary. */
|
||||
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
|
||||
if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) {
|
||||
U32 const ldmHSize = 1U << params->hashLog;
|
||||
U32 const correction = ZSTD_window_correctOverflow(
|
||||
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
|
||||
@ -544,7 +579,9 @@ size_t ZSTD_ldm_generateSequences(
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
|
||||
void
|
||||
ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch)
|
||||
{
|
||||
while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
|
||||
rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
|
||||
if (srcSize <= seq->litLength) {
|
||||
@ -622,12 +659,13 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
|
||||
|
||||
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
ZSTD_paramSwitch_e useRowMatchFinder,
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
unsigned const minMatch = cParams->minMatch;
|
||||
ZSTD_blockCompressor const blockCompressor =
|
||||
ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
|
||||
ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
|
||||
/* Input bounds */
|
||||
BYTE const* const istart = (BYTE const*)src;
|
||||
BYTE const* const iend = istart + srcSize;
|
||||
@ -673,8 +711,8 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||
rep[0] = sequence.offset;
|
||||
/* Store the sequence */
|
||||
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
|
||||
sequence.offset + ZSTD_REP_MOVE,
|
||||
sequence.matchLength - MINMATCH);
|
||||
STORE_OFFSET(sequence.offset),
|
||||
sequence.matchLength);
|
||||
ip += sequence.matchLength;
|
||||
}
|
||||
}
|
||||
|
@ -63,6 +63,7 @@ size_t ZSTD_ldm_generateSequences(
|
||||
*/
|
||||
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
ZSTD_paramSwitch_e useRowMatchFinder,
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
/*
|
||||
|
@ -11,7 +11,10 @@
|
||||
#ifndef ZSTD_LDM_GEARTAB_H
|
||||
#define ZSTD_LDM_GEARTAB_H
|
||||
|
||||
static U64 ZSTD_ldm_gearTab[256] = {
|
||||
#include "../common/compiler.h" /* UNUSED_ATTR */
|
||||
#include "../common/mem.h" /* U64 */
|
||||
|
||||
static UNUSED_ATTR const U64 ZSTD_ldm_gearTab[256] = {
|
||||
0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
|
||||
0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
|
||||
0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
|
||||
|
@ -8,25 +8,12 @@
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Disable inlining for the optimal parser for the kernel build.
|
||||
* It is unlikely to be used in the kernel, and where it is used
|
||||
* latency shouldn't matter because it is very slow to begin with.
|
||||
* We prefer a ~180KB binary size win over faster optimal parsing.
|
||||
*
|
||||
* TODO(https://github.com/facebook/zstd/issues/2862):
|
||||
* Improve the code size of the optimal parser in general, so we
|
||||
* don't need this hack for the kernel build.
|
||||
*/
|
||||
#define ZSTD_NO_INLINE 1
|
||||
|
||||
#include "zstd_compress_internal.h"
|
||||
#include "hist.h"
|
||||
#include "zstd_opt.h"
|
||||
|
||||
|
||||
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
|
||||
#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
|
||||
#define ZSTD_MAX_PRICE (1<<30)
|
||||
|
||||
#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
|
||||
@ -36,11 +23,11 @@
|
||||
* Price functions for optimal parser
|
||||
***************************************/
|
||||
|
||||
#if 0 /* approximation at bit level */
|
||||
#if 0 /* approximation at bit level (for tests) */
|
||||
# define BITCOST_ACCURACY 0
|
||||
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
||||
# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
|
||||
#elif 0 /* fractional bit accuracy */
|
||||
# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
|
||||
#elif 0 /* fractional bit accuracy (for tests) */
|
||||
# define BITCOST_ACCURACY 8
|
||||
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
||||
# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
|
||||
@ -78,7 +65,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
|
||||
|
||||
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
|
||||
{
|
||||
return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
|
||||
return optPtr->literalCompressionMode != ZSTD_ps_disable;
|
||||
}
|
||||
|
||||
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
||||
@ -91,25 +78,46 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
||||
}
|
||||
|
||||
|
||||
/* ZSTD_downscaleStat() :
|
||||
* reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
|
||||
* return the resulting sum of elements */
|
||||
static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
|
||||
static U32 sum_u32(const unsigned table[], size_t nbElts)
|
||||
{
|
||||
size_t n;
|
||||
U32 total = 0;
|
||||
for (n=0; n<nbElts; n++) {
|
||||
total += table[n];
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
|
||||
{
|
||||
U32 s, sum=0;
|
||||
DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
|
||||
assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
|
||||
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
|
||||
assert(shift < 30);
|
||||
for (s=0; s<lastEltIndex+1; s++) {
|
||||
table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
|
||||
table[s] = 1 + (table[s] >> shift);
|
||||
sum += table[s];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/* ZSTD_scaleStats() :
|
||||
* reduce all elements in table is sum too large
|
||||
* return the resulting sum of elements */
|
||||
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
|
||||
{
|
||||
U32 const prevsum = sum_u32(table, lastEltIndex+1);
|
||||
U32 const factor = prevsum >> logTarget;
|
||||
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
|
||||
assert(logTarget < 30);
|
||||
if (factor <= 1) return prevsum;
|
||||
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
|
||||
}
|
||||
|
||||
/* ZSTD_rescaleFreqs() :
|
||||
* if first block (detected by optPtr->litLengthSum == 0) : init statistics
|
||||
* take hints from dictionary if there is one
|
||||
* or init from zero, using src for literals stats, or flat 1 for match symbols
|
||||
* and init from zero if there is none,
|
||||
* using src for literals stats, and baseline stats for sequence symbols
|
||||
* otherwise downscale existing stats, to be used as seed for next block.
|
||||
*/
|
||||
static void
|
||||
@ -138,7 +146,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
optPtr->litSum = 0;
|
||||
for (lit=0; lit<=MaxLit; lit++) {
|
||||
U32 const scaleLog = 11; /* scale to 2K */
|
||||
U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
|
||||
U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
|
||||
assert(bitCost <= scaleLog);
|
||||
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
||||
optPtr->litSum += optPtr->litFreq[lit];
|
||||
@ -186,14 +194,19 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
if (compressedLiterals) {
|
||||
unsigned lit = MaxLit;
|
||||
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
||||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
||||
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
|
||||
}
|
||||
|
||||
{ unsigned ll;
|
||||
for (ll=0; ll<=MaxLL; ll++)
|
||||
optPtr->litLengthFreq[ll] = 1;
|
||||
{ unsigned const baseLLfreqs[MaxLL+1] = {
|
||||
4, 2, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1
|
||||
};
|
||||
ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
|
||||
optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
|
||||
}
|
||||
optPtr->litLengthSum = MaxLL+1;
|
||||
|
||||
{ unsigned ml;
|
||||
for (ml=0; ml<=MaxML; ml++)
|
||||
@ -201,21 +214,26 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
}
|
||||
optPtr->matchLengthSum = MaxML+1;
|
||||
|
||||
{ unsigned of;
|
||||
for (of=0; of<=MaxOff; of++)
|
||||
optPtr->offCodeFreq[of] = 1;
|
||||
{ unsigned const baseOFCfreqs[MaxOff+1] = {
|
||||
6, 2, 1, 1, 2, 3, 4, 4,
|
||||
4, 3, 2, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1
|
||||
};
|
||||
ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
|
||||
optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
|
||||
}
|
||||
optPtr->offCodeSum = MaxOff+1;
|
||||
|
||||
|
||||
}
|
||||
|
||||
} else { /* new block : re-use previous statistics, scaled down */
|
||||
|
||||
if (compressedLiterals)
|
||||
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
||||
optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
||||
optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
||||
optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
||||
optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
|
||||
optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
|
||||
optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
|
||||
optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
|
||||
}
|
||||
|
||||
ZSTD_setBasePrices(optPtr, optLevel);
|
||||
@ -251,7 +269,16 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
||||
* cost of literalLength symbol */
|
||||
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
|
||||
{
|
||||
if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
|
||||
assert(litLength <= ZSTD_BLOCKSIZE_MAX);
|
||||
if (optPtr->priceType == zop_predef)
|
||||
return WEIGHT(litLength, optLevel);
|
||||
/* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
|
||||
* because it isn't representable in the zstd format. So instead just
|
||||
* call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
|
||||
* would be all literals.
|
||||
*/
|
||||
if (litLength == ZSTD_BLOCKSIZE_MAX)
|
||||
return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
|
||||
|
||||
/* dynamic statistics */
|
||||
{ U32 const llCode = ZSTD_LLcode(litLength);
|
||||
@ -264,15 +291,17 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
|
||||
/* ZSTD_getMatchPrice() :
|
||||
* Provides the cost of the match part (offset + matchLength) of a sequence
|
||||
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
|
||||
* optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
|
||||
* @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
|
||||
* @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
|
||||
*/
|
||||
FORCE_INLINE_TEMPLATE U32
|
||||
ZSTD_getMatchPrice(U32 const offset,
|
||||
ZSTD_getMatchPrice(U32 const offcode,
|
||||
U32 const matchLength,
|
||||
const optState_t* const optPtr,
|
||||
int const optLevel)
|
||||
{
|
||||
U32 price;
|
||||
U32 const offCode = ZSTD_highbit32(offset+1);
|
||||
U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
|
||||
U32 const mlBase = matchLength - MINMATCH;
|
||||
assert(matchLength >= MINMATCH);
|
||||
|
||||
@ -315,8 +344,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
|
||||
optPtr->litLengthSum++;
|
||||
}
|
||||
|
||||
/* match offset code (0-2=>repCode; 3+=>offset+2) */
|
||||
{ U32 const offCode = ZSTD_highbit32(offsetCode+1);
|
||||
/* offset code : expected to follow storeSeq() numeric representation */
|
||||
{ U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
|
||||
assert(offCode <= MaxOff);
|
||||
optPtr->offCodeFreq[offCode]++;
|
||||
optPtr->offCodeSum++;
|
||||
@ -350,7 +379,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
|
||||
|
||||
/* Update hashTable3 up to ip (excluded)
|
||||
Assumption : always within prefix (i.e. not within extDict) */
|
||||
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
||||
static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
|
||||
U32* nextToUpdate3,
|
||||
const BYTE* const ip)
|
||||
{
|
||||
@ -376,11 +405,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
||||
* Binary Tree search
|
||||
***************************************/
|
||||
/* ZSTD_insertBt1() : add one or multiple positions to tree.
|
||||
* ip : assumed <= iend-8 .
|
||||
* @param ip assumed <= iend-8 .
|
||||
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position
|
||||
* @return : nb of positions added */
|
||||
static U32 ZSTD_insertBt1(
|
||||
ZSTD_matchState_t* ms,
|
||||
const ZSTD_matchState_t* ms,
|
||||
const BYTE* const ip, const BYTE* const iend,
|
||||
U32 const target,
|
||||
U32 const mls, const int extDict)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
@ -403,7 +434,10 @@ static U32 ZSTD_insertBt1(
|
||||
U32* smallerPtr = bt + 2*(curr&btMask);
|
||||
U32* largerPtr = smallerPtr + 1;
|
||||
U32 dummy32; /* to be nullified at the end */
|
||||
U32 const windowLow = ms->window.lowLimit;
|
||||
/* windowLow is based on target because
|
||||
* we only need positions that will be in the window at the end of the tree update.
|
||||
*/
|
||||
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
|
||||
U32 matchEndIdx = curr+8+1;
|
||||
size_t bestLength = 8;
|
||||
U32 nbCompares = 1U << cParams->searchLog;
|
||||
@ -416,6 +450,7 @@ static U32 ZSTD_insertBt1(
|
||||
|
||||
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
|
||||
|
||||
assert(curr <= target);
|
||||
assert(ip <= iend-8); /* required for h calculation */
|
||||
hashTable[h] = curr; /* Update Hash Table */
|
||||
|
||||
@ -504,7 +539,7 @@ void ZSTD_updateTree_internal(
|
||||
idx, target, dictMode);
|
||||
|
||||
while(idx < target) {
|
||||
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
||||
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
|
||||
assert(idx < (U32)(idx + forward));
|
||||
idx += forward;
|
||||
}
|
||||
@ -609,7 +644,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
|
||||
repCode, ll0, repOffset, repLen);
|
||||
bestLength = repLen;
|
||||
matches[mnum].off = repCode - ll0;
|
||||
matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1); /* expect value between 1 and 3 */
|
||||
matches[mnum].len = (U32)repLen;
|
||||
mnum++;
|
||||
if ( (repLen > sufficient_len)
|
||||
@ -638,7 +673,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
bestLength = mlen;
|
||||
assert(curr > matchIndex3);
|
||||
assert(mnum==0); /* no prior solution */
|
||||
matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
|
||||
matches[0].off = STORE_OFFSET(curr - matchIndex3);
|
||||
matches[0].len = (U32)mlen;
|
||||
mnum = 1;
|
||||
if ( (mlen > sufficient_len) |
|
||||
@ -647,7 +682,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
return 1;
|
||||
} } }
|
||||
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
|
||||
}
|
||||
} /* if (mls == 3) */
|
||||
|
||||
hashTable[h] = curr; /* Update Hash Table */
|
||||
|
||||
@ -672,20 +707,19 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
|
||||
if (matchLength > bestLength) {
|
||||
DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
|
||||
(U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
|
||||
(U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
|
||||
assert(matchEndIdx > matchIndex);
|
||||
if (matchLength > matchEndIdx - matchIndex)
|
||||
matchEndIdx = matchIndex + (U32)matchLength;
|
||||
bestLength = matchLength;
|
||||
matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
|
||||
matches[mnum].off = STORE_OFFSET(curr - matchIndex);
|
||||
matches[mnum].len = (U32)matchLength;
|
||||
mnum++;
|
||||
if ( (matchLength > ZSTD_OPT_NUM)
|
||||
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
||||
if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
|
||||
break; /* drop, to preserve bt consistency (miss a little bit of compression) */
|
||||
}
|
||||
}
|
||||
} }
|
||||
|
||||
if (match[matchLength] < ip[matchLength]) {
|
||||
/* match smaller than current */
|
||||
@ -721,18 +755,17 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
if (matchLength > bestLength) {
|
||||
matchIndex = dictMatchIndex + dmsIndexDelta;
|
||||
DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
|
||||
(U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
|
||||
(U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
|
||||
if (matchLength > matchEndIdx - matchIndex)
|
||||
matchEndIdx = matchIndex + (U32)matchLength;
|
||||
bestLength = matchLength;
|
||||
matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
|
||||
matches[mnum].off = STORE_OFFSET(curr - matchIndex);
|
||||
matches[mnum].len = (U32)matchLength;
|
||||
mnum++;
|
||||
if ( (matchLength > ZSTD_OPT_NUM)
|
||||
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
||||
break; /* drop, to guarantee consistency (miss a little bit of compression) */
|
||||
}
|
||||
}
|
||||
} }
|
||||
|
||||
if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
|
||||
if (match[matchLength] < ip[matchLength]) {
|
||||
@ -742,39 +775,91 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
/* match is larger than current */
|
||||
commonLengthLarger = matchLength;
|
||||
dictMatchIndex = nextPtr[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
} } } /* if (dictMode == ZSTD_dictMatchState) */
|
||||
|
||||
assert(matchEndIdx > curr+8);
|
||||
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
||||
return mnum;
|
||||
}
|
||||
|
||||
typedef U32 (*ZSTD_getAllMatchesFn)(
|
||||
ZSTD_match_t*,
|
||||
ZSTD_matchState_t*,
|
||||
U32*,
|
||||
const BYTE*,
|
||||
const BYTE*,
|
||||
const U32 rep[ZSTD_REP_NUM],
|
||||
U32 const ll0,
|
||||
U32 const lengthToBeat);
|
||||
|
||||
FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
|
||||
ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
|
||||
ZSTD_matchState_t* ms,
|
||||
U32* nextToUpdate3,
|
||||
const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
|
||||
const U32 rep[ZSTD_REP_NUM],
|
||||
U32 const ll0,
|
||||
U32 const lengthToBeat)
|
||||
FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
|
||||
ZSTD_match_t* matches,
|
||||
ZSTD_matchState_t* ms,
|
||||
U32* nextToUpdate3,
|
||||
const BYTE* ip,
|
||||
const BYTE* const iHighLimit,
|
||||
const U32 rep[ZSTD_REP_NUM],
|
||||
U32 const ll0,
|
||||
U32 const lengthToBeat,
|
||||
const ZSTD_dictMode_e dictMode,
|
||||
const U32 mls)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32 const matchLengthSearch = cParams->minMatch;
|
||||
DEBUGLOG(8, "ZSTD_BtGetAllMatches");
|
||||
if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
|
||||
ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
|
||||
switch(matchLengthSearch)
|
||||
{
|
||||
case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
|
||||
default :
|
||||
case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
|
||||
case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
|
||||
case 7 :
|
||||
case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
|
||||
assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
|
||||
DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
|
||||
if (ip < ms->window.base + ms->nextToUpdate)
|
||||
return 0; /* skipped area */
|
||||
ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
|
||||
return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
|
||||
}
|
||||
|
||||
#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
|
||||
|
||||
#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
|
||||
static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
|
||||
ZSTD_match_t* matches, \
|
||||
ZSTD_matchState_t* ms, \
|
||||
U32* nextToUpdate3, \
|
||||
const BYTE* ip, \
|
||||
const BYTE* const iHighLimit, \
|
||||
const U32 rep[ZSTD_REP_NUM], \
|
||||
U32 const ll0, \
|
||||
U32 const lengthToBeat) \
|
||||
{ \
|
||||
return ZSTD_btGetAllMatches_internal( \
|
||||
matches, ms, nextToUpdate3, ip, iHighLimit, \
|
||||
rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
|
||||
}
|
||||
|
||||
#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
|
||||
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
|
||||
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
|
||||
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
|
||||
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
|
||||
|
||||
GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
|
||||
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
|
||||
GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
|
||||
|
||||
#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
|
||||
{ \
|
||||
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
|
||||
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
|
||||
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
|
||||
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
|
||||
}
|
||||
|
||||
static ZSTD_getAllMatchesFn
|
||||
ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
|
||||
{
|
||||
ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
|
||||
ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
|
||||
ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
|
||||
ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
|
||||
};
|
||||
U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
|
||||
assert((U32)dictMode < 3);
|
||||
assert(mls - 3 < 4);
|
||||
return getAllMatchesFns[(int)dictMode][mls - 3];
|
||||
}
|
||||
|
||||
/* ***********************
|
||||
@ -783,16 +868,18 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
|
||||
|
||||
/* Struct containing info needed to make decision about ldm inclusion */
|
||||
typedef struct {
|
||||
rawSeqStore_t seqStore; /* External match candidates store for this block */
|
||||
U32 startPosInBlock; /* Start position of the current match candidate */
|
||||
U32 endPosInBlock; /* End position of the current match candidate */
|
||||
U32 offset; /* Offset of the match candidate */
|
||||
rawSeqStore_t seqStore; /* External match candidates store for this block */
|
||||
U32 startPosInBlock; /* Start position of the current match candidate */
|
||||
U32 endPosInBlock; /* End position of the current match candidate */
|
||||
U32 offset; /* Offset of the match candidate */
|
||||
} ZSTD_optLdm_t;
|
||||
|
||||
/* ZSTD_optLdm_skipRawSeqStoreBytes():
|
||||
* Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
|
||||
* Moves forward in @rawSeqStore by @nbBytes,
|
||||
* which will update the fields 'pos' and 'posInSequence'.
|
||||
*/
|
||||
static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
|
||||
static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
|
||||
{
|
||||
U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
|
||||
while (currPos && rawSeqStore->pos < rawSeqStore->size) {
|
||||
rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
|
||||
@ -813,8 +900,10 @@ static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t
|
||||
* Calculates the beginning and end of the next match in the current block.
|
||||
* Updates 'pos' and 'posInSequence' of the ldmSeqStore.
|
||||
*/
|
||||
static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
|
||||
U32 blockBytesRemaining) {
|
||||
static void
|
||||
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
|
||||
U32 blockBytesRemaining)
|
||||
{
|
||||
rawSeq currSeq;
|
||||
U32 currBlockEndPos;
|
||||
U32 literalsBytesRemaining;
|
||||
@ -826,8 +915,8 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu
|
||||
optLdm->endPosInBlock = UINT_MAX;
|
||||
return;
|
||||
}
|
||||
/* Calculate appropriate bytes left in matchLength and litLength after adjusting
|
||||
based on ldmSeqStore->posInSequence */
|
||||
/* Calculate appropriate bytes left in matchLength and litLength
|
||||
* after adjusting based on ldmSeqStore->posInSequence */
|
||||
currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
|
||||
assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
|
||||
currBlockEndPos = currPosInBlock + blockBytesRemaining;
|
||||
@ -863,15 +952,16 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu
|
||||
}
|
||||
|
||||
/* ZSTD_optLdm_maybeAddMatch():
|
||||
* Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
|
||||
* and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
|
||||
* Adds a match if it's long enough,
|
||||
* based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
|
||||
* into 'matches'. Maintains the correct ordering of 'matches'.
|
||||
*/
|
||||
static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
|
||||
ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
|
||||
U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
|
||||
const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
|
||||
{
|
||||
U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
|
||||
/* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
|
||||
U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
|
||||
U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
|
||||
U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
|
||||
|
||||
/* Ensure that current block position is not outside of the match */
|
||||
if (currPosInBlock < optLdm->startPosInBlock
|
||||
@ -881,6 +971,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
|
||||
}
|
||||
|
||||
if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
|
||||
U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
|
||||
DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
|
||||
candidateOffCode, candidateMatchLength, currPosInBlock);
|
||||
matches[*nbMatches].len = candidateMatchLength;
|
||||
@ -892,8 +983,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
|
||||
/* ZSTD_optLdm_processMatchCandidate():
|
||||
* Wrapper function to update ldm seq store and call ldm functions as necessary.
|
||||
*/
|
||||
static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
|
||||
U32 currPosInBlock, U32 remainingBytes) {
|
||||
static void
|
||||
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
|
||||
ZSTD_match_t* matches, U32* nbMatches,
|
||||
U32 currPosInBlock, U32 remainingBytes)
|
||||
{
|
||||
if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
|
||||
return;
|
||||
}
|
||||
@ -904,19 +998,19 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_
|
||||
* at the end of a match from the ldm seq store, and will often be some bytes
|
||||
* over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
|
||||
*/
|
||||
U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
|
||||
U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
|
||||
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
|
||||
}
|
||||
}
|
||||
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
|
||||
}
|
||||
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
|
||||
}
|
||||
|
||||
|
||||
/*-*******************************
|
||||
* Optimal parser
|
||||
*********************************/
|
||||
|
||||
|
||||
static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
|
||||
{
|
||||
return sol.litlen + sol.mlen;
|
||||
@ -957,6 +1051,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
const BYTE* const prefixStart = base + ms->window.dictLimit;
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
|
||||
ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
|
||||
|
||||
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
||||
U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
|
||||
U32 nextToUpdate3 = ms->nextToUpdate;
|
||||
@ -984,7 +1080,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
/* find first match */
|
||||
{ U32 const litlen = (U32)(ip - anchor);
|
||||
U32 const ll0 = !litlen;
|
||||
U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
|
||||
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
|
||||
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
||||
(U32)(ip-istart), (U32)(iend - ip));
|
||||
if (!nbMatches) { ip++; continue; }
|
||||
@ -998,18 +1094,18 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
* in every price. We include the literal length to avoid negative
|
||||
* prices when we subtract the previous literal length.
|
||||
*/
|
||||
opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
|
||||
opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
|
||||
|
||||
/* large match -> immediate encoding */
|
||||
{ U32 const maxML = matches[nbMatches-1].len;
|
||||
U32 const maxOffset = matches[nbMatches-1].off;
|
||||
U32 const maxOffcode = matches[nbMatches-1].off;
|
||||
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
|
||||
nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
|
||||
nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
|
||||
|
||||
if (maxML > sufficient_len) {
|
||||
lastSequence.litlen = litlen;
|
||||
lastSequence.mlen = maxML;
|
||||
lastSequence.off = maxOffset;
|
||||
lastSequence.off = maxOffcode;
|
||||
DEBUGLOG(6, "large match (%u>%u), immediate encoding",
|
||||
maxML, sufficient_len);
|
||||
cur = 0;
|
||||
@ -1018,24 +1114,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
} }
|
||||
|
||||
/* set prices for first matches starting position == 0 */
|
||||
{ U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
||||
assert(opt[0].price >= 0);
|
||||
{ U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
||||
U32 pos;
|
||||
U32 matchNb;
|
||||
for (pos = 1; pos < minMatch; pos++) {
|
||||
opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
|
||||
}
|
||||
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
|
||||
U32 const offset = matches[matchNb].off;
|
||||
U32 const offcode = matches[matchNb].off;
|
||||
U32 const end = matches[matchNb].len;
|
||||
for ( ; pos <= end ; pos++ ) {
|
||||
U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
|
||||
U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
|
||||
U32 const sequencePrice = literalsPrice + matchPrice;
|
||||
DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
|
||||
pos, ZSTD_fCost(sequencePrice));
|
||||
opt[pos].mlen = pos;
|
||||
opt[pos].off = offset;
|
||||
opt[pos].off = offcode;
|
||||
opt[pos].litlen = litlen;
|
||||
opt[pos].price = sequencePrice;
|
||||
opt[pos].price = (int)sequencePrice;
|
||||
} }
|
||||
last_pos = pos-1;
|
||||
}
|
||||
@ -1050,9 +1147,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
/* Fix current position with one literal if cheaper */
|
||||
{ U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
|
||||
int const price = opt[cur-1].price
|
||||
+ ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
|
||||
+ ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
|
||||
- ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
||||
+ (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
|
||||
+ (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
|
||||
- (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
||||
assert(price < 1000000000); /* overflow check */
|
||||
if (price <= opt[cur].price) {
|
||||
DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
|
||||
@ -1078,7 +1175,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
assert(cur >= opt[cur].mlen);
|
||||
if (opt[cur].mlen != 0) {
|
||||
U32 const prev = cur - opt[cur].mlen;
|
||||
repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
|
||||
repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
|
||||
ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
|
||||
} else {
|
||||
ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
|
||||
@ -1095,11 +1192,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
|
||||
}
|
||||
|
||||
assert(opt[cur].price >= 0);
|
||||
{ U32 const ll0 = (opt[cur].mlen != 0);
|
||||
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
|
||||
U32 const previousPrice = opt[cur].price;
|
||||
U32 const previousPrice = (U32)opt[cur].price;
|
||||
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
||||
U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
|
||||
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
|
||||
U32 matchNb;
|
||||
|
||||
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
||||
@ -1137,7 +1235,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
|
||||
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
|
||||
U32 const pos = cur + mlen;
|
||||
int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
||||
int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
||||
|
||||
if ((pos > last_pos) || (price < opt[pos].price)) {
|
||||
DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
|
||||
@ -1167,7 +1265,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
* update them while traversing the sequences.
|
||||
*/
|
||||
if (lastSequence.mlen != 0) {
|
||||
repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
|
||||
repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
|
||||
ZSTD_memcpy(rep, &reps, sizeof(reps));
|
||||
} else {
|
||||
ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
|
||||
@ -1211,7 +1309,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
|
||||
assert(anchor + llen <= iend);
|
||||
ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
|
||||
ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
|
||||
anchor += advance;
|
||||
ip = anchor;
|
||||
} }
|
||||
@ -1223,38 +1321,30 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
static size_t ZSTD_compressBlock_opt0(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
|
||||
{
|
||||
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
|
||||
}
|
||||
|
||||
static size_t ZSTD_compressBlock_opt2(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
|
||||
{
|
||||
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_btopt(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
|
||||
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
||||
}
|
||||
|
||||
|
||||
/* used in 2-pass strategy */
|
||||
static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
|
||||
{
|
||||
U32 s, sum=0;
|
||||
assert(ZSTD_FREQ_DIV+bonus >= 0);
|
||||
for (s=0; s<lastEltIndex+1; s++) {
|
||||
table[s] <<= ZSTD_FREQ_DIV+bonus;
|
||||
table[s]--;
|
||||
sum += table[s];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/* used in 2-pass strategy */
|
||||
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
|
||||
{
|
||||
if (ZSTD_compressedLiterals(optPtr))
|
||||
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
|
||||
optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
||||
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
||||
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
||||
}
|
||||
|
||||
/* ZSTD_initStats_ultra():
|
||||
* make a first compression pass, just to seed stats with more accurate starting values.
|
||||
@ -1276,7 +1366,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
||||
assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
|
||||
assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
|
||||
|
||||
ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
|
||||
ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
|
||||
|
||||
/* invalidate first scan from history */
|
||||
ZSTD_resetSeqStore(seqStore);
|
||||
@ -1285,8 +1375,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
||||
ms->window.lowLimit = ms->window.dictLimit;
|
||||
ms->nextToUpdate = ms->window.dictLimit;
|
||||
|
||||
/* re-inforce weight of collected statistics */
|
||||
ZSTD_upscaleStats(&ms->opt);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_btultra(
|
||||
@ -1294,7 +1382,7 @@ size_t ZSTD_compressBlock_btultra(
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
|
||||
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_btultra2(
|
||||
@ -1322,35 +1410,35 @@ size_t ZSTD_compressBlock_btultra2(
|
||||
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
|
||||
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
|
||||
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_btopt_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_btultra_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
|
||||
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_btopt_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
|
||||
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_btultra_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
|
||||
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
|
||||
}
|
||||
|
||||
/* note : no btultra2 variant for extDict nor dictMatchState,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -53,7 +53,6 @@
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
||||
#include "../common/cpu.h" /* bmi2 */
|
||||
#include "../common/mem.h" /* low level memory routines */
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "../common/fse.h"
|
||||
@ -252,11 +251,11 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
|
||||
dctx->inBuffSize = 0;
|
||||
dctx->outBuffSize = 0;
|
||||
dctx->streamStage = zdss_init;
|
||||
dctx->legacyContext = NULL;
|
||||
dctx->previousLegacyVersion = 0;
|
||||
dctx->noForwardProgress = 0;
|
||||
dctx->oversizedDuration = 0;
|
||||
dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
|
||||
#if DYNAMIC_BMI2
|
||||
dctx->bmi2 = ZSTD_cpuSupportsBmi2();
|
||||
#endif
|
||||
dctx->ddictSet = NULL;
|
||||
ZSTD_DCtx_resetParameters(dctx);
|
||||
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
@ -277,8 +276,7 @@ ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
|
||||
return dctx;
|
||||
}
|
||||
|
||||
ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
|
||||
{
|
||||
static ZSTD_DCtx* ZSTD_createDCtx_internal(ZSTD_customMem customMem) {
|
||||
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
|
||||
|
||||
{ ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem);
|
||||
@ -289,10 +287,15 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
|
||||
}
|
||||
}
|
||||
|
||||
ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
|
||||
{
|
||||
return ZSTD_createDCtx_internal(customMem);
|
||||
}
|
||||
|
||||
ZSTD_DCtx* ZSTD_createDCtx(void)
|
||||
{
|
||||
DEBUGLOG(3, "ZSTD_createDCtx");
|
||||
return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
|
||||
return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
|
||||
}
|
||||
|
||||
static void ZSTD_clearDict(ZSTD_DCtx* dctx)
|
||||
@ -370,6 +373,19 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*! ZSTD_isSkippableFrame() :
|
||||
* Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame.
|
||||
* Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
|
||||
*/
|
||||
unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size)
|
||||
{
|
||||
if (size < ZSTD_FRAMEIDSIZE) return 0;
|
||||
{ U32 const magic = MEM_readLE32(buffer);
|
||||
if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ZSTD_frameHeaderSize_internal() :
|
||||
* srcSize must be large enough to reach header size fields.
|
||||
* note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
|
||||
@ -497,7 +513,6 @@ size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t src
|
||||
return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
|
||||
}
|
||||
|
||||
|
||||
/* ZSTD_getFrameContentSize() :
|
||||
* compatible with legacy mode
|
||||
* @return : decompressed size of the single frame pointed to be `src` if known, otherwise
|
||||
@ -532,6 +547,37 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
|
||||
}
|
||||
}
|
||||
|
||||
/*! ZSTD_readSkippableFrame() :
|
||||
* Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
|
||||
*
|
||||
* The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
|
||||
* i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested
|
||||
* in the magicVariant.
|
||||
*
|
||||
* Returns an error if destination buffer is not large enough, or if the frame is not skippable.
|
||||
*
|
||||
* @return : number of bytes written or a ZSTD error.
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
U32 const magicNumber = MEM_readLE32(src);
|
||||
size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
|
||||
size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
|
||||
|
||||
/* check input validity */
|
||||
RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
|
||||
RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
|
||||
RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
|
||||
|
||||
/* deliver payload */
|
||||
if (skippableContentSize > 0 && dst != NULL)
|
||||
ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
|
||||
if (magicVariant != NULL)
|
||||
*magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
|
||||
return skippableContentSize;
|
||||
}
|
||||
|
||||
/* ZSTD_findDecompressedSize() :
|
||||
* compatible with legacy mode
|
||||
* `srcSize` must be the exact length of some number of ZSTD compressed and/or
|
||||
@ -824,7 +870,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
||||
switch(blockProperties.blockType)
|
||||
{
|
||||
case bt_compressed:
|
||||
decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1);
|
||||
decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1, not_streaming);
|
||||
break;
|
||||
case bt_raw :
|
||||
decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
|
||||
@ -976,7 +1022,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
|
||||
{
|
||||
#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
|
||||
size_t regenSize;
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx_internal(ZSTD_defaultCMem);
|
||||
RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!");
|
||||
regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
|
||||
ZSTD_freeDCtx(dctx);
|
||||
@ -1010,7 +1056,7 @@ static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t
|
||||
return dctx->expected;
|
||||
if (dctx->bType != bt_raw)
|
||||
return dctx->expected;
|
||||
return MIN(MAX(inputSize, 1), dctx->expected);
|
||||
return BOUNDED(1, inputSize, dctx->expected);
|
||||
}
|
||||
|
||||
ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
|
||||
@ -1116,7 +1162,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
{
|
||||
case bt_compressed:
|
||||
DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
|
||||
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
|
||||
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming);
|
||||
dctx->expected = 0; /* Streaming not supported */
|
||||
break;
|
||||
case bt_raw :
|
||||
@ -1438,7 +1484,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
|
||||
ZSTD_DStream* ZSTD_createDStream(void)
|
||||
{
|
||||
DEBUGLOG(3, "ZSTD_createDStream");
|
||||
return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
|
||||
return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
|
||||
}
|
||||
|
||||
ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
|
||||
@ -1448,7 +1494,7 @@ ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
|
||||
|
||||
ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
|
||||
{
|
||||
return ZSTD_createDCtx_advanced(customMem);
|
||||
return ZSTD_createDCtx_internal(customMem);
|
||||
}
|
||||
|
||||
size_t ZSTD_freeDStream(ZSTD_DStream* zds)
|
||||
@ -1708,7 +1754,8 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
|
||||
size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
|
||||
{
|
||||
size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
|
||||
unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
|
||||
/* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/
|
||||
unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2);
|
||||
unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
|
||||
size_t const minRBSize = (size_t) neededSize;
|
||||
RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
|
||||
@ -1842,7 +1889,6 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
||||
DEBUGLOG(5, "stage zdss_init => transparent reset ");
|
||||
zds->streamStage = zdss_loadHeader;
|
||||
zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
|
||||
zds->legacyVersion = 0;
|
||||
zds->hostageByte = 0;
|
||||
zds->expectedOutBuffer = *output;
|
||||
ZSTD_FALLTHROUGH;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -33,6 +33,12 @@
|
||||
*/
|
||||
|
||||
|
||||
/* Streaming state is used to inform allocation of the literal buffer */
|
||||
typedef enum {
|
||||
not_streaming = 0,
|
||||
is_streaming = 1
|
||||
} streaming_operation;
|
||||
|
||||
/* ZSTD_decompressBlock_internal() :
|
||||
* decompress block, starting at `src`,
|
||||
* into destination buffer `dst`.
|
||||
@ -41,7 +47,7 @@
|
||||
*/
|
||||
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize, const int frame);
|
||||
const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
|
||||
|
||||
/* ZSTD_buildFSETable() :
|
||||
* generate FSE decoding table for one symbol (ll, ml or off)
|
||||
@ -54,7 +60,7 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
||||
*/
|
||||
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
||||
const short* normalizedCounter, unsigned maxSymbolValue,
|
||||
const U32* baseValue, const U32* nbAdditionalBits,
|
||||
const U32* baseValue, const U8* nbAdditionalBits,
|
||||
unsigned tableLog, void* wksp, size_t wkspSize,
|
||||
int bmi2);
|
||||
|
||||
|
@ -20,7 +20,7 @@
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include "../common/mem.h" /* BYTE, U16, U32 */
|
||||
#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
|
||||
#include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
|
||||
|
||||
|
||||
|
||||
@ -40,7 +40,7 @@ static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
|
||||
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
||||
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
||||
|
||||
static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
|
||||
static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
@ -106,6 +106,22 @@ typedef struct {
|
||||
size_t ddictPtrCount;
|
||||
} ZSTD_DDictHashSet;
|
||||
|
||||
#ifndef ZSTD_DECODER_INTERNAL_BUFFER
|
||||
# define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
|
||||
#endif
|
||||
|
||||
#define ZSTD_LBMIN 64
|
||||
#define ZSTD_LBMAX (128 << 10)
|
||||
|
||||
/* extra buffer, compensates when dst is not large enough to store litBuffer */
|
||||
#define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
|
||||
|
||||
typedef enum {
|
||||
ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
|
||||
ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
|
||||
ZSTD_split = 2 /* Split between litExtraBuffer and dst */
|
||||
} ZSTD_litLocation_e;
|
||||
|
||||
struct ZSTD_DCtx_s
|
||||
{
|
||||
const ZSTD_seqSymbol* LLTptr;
|
||||
@ -136,7 +152,9 @@ struct ZSTD_DCtx_s
|
||||
size_t litSize;
|
||||
size_t rleSize;
|
||||
size_t staticSize;
|
||||
#if DYNAMIC_BMI2 != 0
|
||||
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
|
||||
#endif
|
||||
|
||||
/* dictionary */
|
||||
ZSTD_DDict* ddictLocal;
|
||||
@ -158,16 +176,16 @@ struct ZSTD_DCtx_s
|
||||
size_t outStart;
|
||||
size_t outEnd;
|
||||
size_t lhSize;
|
||||
void* legacyContext;
|
||||
U32 previousLegacyVersion;
|
||||
U32 legacyVersion;
|
||||
U32 hostageByte;
|
||||
int noForwardProgress;
|
||||
ZSTD_bufferMode_e outBufferMode;
|
||||
ZSTD_outBuffer expectedOutBuffer;
|
||||
|
||||
/* workspace */
|
||||
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
|
||||
BYTE* litBuffer;
|
||||
const BYTE* litBufferEnd;
|
||||
ZSTD_litLocation_e litBufferLocation;
|
||||
BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
|
||||
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
||||
|
||||
size_t oversizedDuration;
|
||||
@ -180,6 +198,14 @@ struct ZSTD_DCtx_s
|
||||
/* Tracing */
|
||||
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
||||
|
||||
MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
|
||||
#if DYNAMIC_BMI2 != 0
|
||||
return dctx->bmi2;
|
||||
#else
|
||||
(void)dctx;
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*-*******************************************************
|
||||
* Shared internal functions
|
||||
|
@ -16,6 +16,12 @@
|
||||
* decompression.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Disable the ASM Huffman implementation because we need to
|
||||
* include all the sources.
|
||||
*/
|
||||
#define ZSTD_DISABLE_ASM 1
|
||||
|
||||
#include "common/debug.c"
|
||||
#include "common/entropy_common.c"
|
||||
#include "common/error_private.c"
|
||||
|
@ -133,7 +133,11 @@ EXPORT_SYMBOL(zstd_init_cstream);
|
||||
size_t zstd_reset_cstream(zstd_cstream *cstream,
|
||||
unsigned long long pledged_src_size)
|
||||
{
|
||||
return ZSTD_resetCStream(cstream, pledged_src_size);
|
||||
if (pledged_src_size == 0)
|
||||
pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
|
||||
ZSTD_FORWARD_IF_ERR( ZSTD_CCtx_reset(cstream, ZSTD_reset_session_only) );
|
||||
ZSTD_FORWARD_IF_ERR( ZSTD_CCtx_setPledgedSrcSize(cstream, pledged_src_size) );
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(zstd_reset_cstream);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user