This commit is contained in:
Stephen Rothwell 2024-12-20 15:11:11 +11:00
commit d10d7039ab
58 changed files with 4791 additions and 2596 deletions

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -17,8 +18,17 @@
/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ /* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */
#define ZSTDERRORLIB_VISIBILITY #define ZSTDERRORLIB_VISIBLE
#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
#ifndef ZSTDERRORLIB_HIDDEN
# if (__GNUC__ >= 4) && !defined(__MINGW32__)
# define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden")))
# else
# define ZSTDERRORLIB_HIDDEN
# endif
#endif
#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE
/*-********************************************* /*-*********************************************
* Error codes list * Error codes list
@ -43,14 +53,17 @@ typedef enum {
ZSTD_error_frameParameter_windowTooLarge = 16, ZSTD_error_frameParameter_windowTooLarge = 16,
ZSTD_error_corruption_detected = 20, ZSTD_error_corruption_detected = 20,
ZSTD_error_checksum_wrong = 22, ZSTD_error_checksum_wrong = 22,
ZSTD_error_literals_headerWrong = 24,
ZSTD_error_dictionary_corrupted = 30, ZSTD_error_dictionary_corrupted = 30,
ZSTD_error_dictionary_wrong = 32, ZSTD_error_dictionary_wrong = 32,
ZSTD_error_dictionaryCreation_failed = 34, ZSTD_error_dictionaryCreation_failed = 34,
ZSTD_error_parameter_unsupported = 40, ZSTD_error_parameter_unsupported = 40,
ZSTD_error_parameter_combination_unsupported = 41,
ZSTD_error_parameter_outOfBound = 42, ZSTD_error_parameter_outOfBound = 42,
ZSTD_error_tableLog_tooLarge = 44, ZSTD_error_tableLog_tooLarge = 44,
ZSTD_error_maxSymbolValue_tooLarge = 46, ZSTD_error_maxSymbolValue_tooLarge = 46,
ZSTD_error_maxSymbolValue_tooSmall = 48, ZSTD_error_maxSymbolValue_tooSmall = 48,
ZSTD_error_stabilityCondition_notRespected = 50,
ZSTD_error_stage_wrong = 60, ZSTD_error_stage_wrong = 60,
ZSTD_error_init_missing = 62, ZSTD_error_init_missing = 62,
ZSTD_error_memory_allocation = 64, ZSTD_error_memory_allocation = 64,
@ -58,11 +71,15 @@ typedef enum {
ZSTD_error_dstSize_tooSmall = 70, ZSTD_error_dstSize_tooSmall = 70,
ZSTD_error_srcSize_wrong = 72, ZSTD_error_srcSize_wrong = 72,
ZSTD_error_dstBuffer_null = 74, ZSTD_error_dstBuffer_null = 74,
ZSTD_error_noForwardProgress_destFull = 80,
ZSTD_error_noForwardProgress_inputEmpty = 82,
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
ZSTD_error_frameIndex_tooLarge = 100, ZSTD_error_frameIndex_tooLarge = 100,
ZSTD_error_seekableIO = 102, ZSTD_error_seekableIO = 102,
ZSTD_error_dstBuffer_wrong = 104, ZSTD_error_dstBuffer_wrong = 104,
ZSTD_error_srcBuffer_wrong = 105, ZSTD_error_srcBuffer_wrong = 105,
ZSTD_error_sequenceProducer_failed = 106,
ZSTD_error_externalSequences_invalid = 107,
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
} ZSTD_ErrorCode; } ZSTD_ErrorCode;

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause # SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
# ################################################################ # ################################################################
# Copyright (c) Facebook, Inc. # Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved. # All rights reserved.
# #
# This source code is licensed under both the BSD-style license (found in the # This source code is licensed under both the BSD-style license (found in the

View File

@ -0,0 +1,56 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* This file provides custom allocation primitives
*/
#define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
#include "mem.h" /* MEM_STATIC */
#define ZSTD_STATIC_LINKING_ONLY
#include <linux/zstd.h> /* ZSTD_customMem */
#ifndef ZSTD_ALLOCATIONS_H
#define ZSTD_ALLOCATIONS_H
/* custom memory allocation functions */
MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc)
return customMem.customAlloc(customMem.opaque, size);
return ZSTD_malloc(size);
}
MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc) {
/* calloc implemented as malloc+memset;
* not as efficient as calloc, but next best guess for custom malloc */
void* const ptr = customMem.customAlloc(customMem.opaque, size);
ZSTD_memset(ptr, 0, size);
return ptr;
}
return ZSTD_calloc(1, size);
}
MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
{
if (ptr!=NULL) {
if (customMem.customFree)
customMem.customFree(customMem.opaque, ptr);
else
ZSTD_free(ptr);
}
}
#endif /* ZSTD_ALLOCATIONS_H */

149
lib/zstd/common/bits.h Normal file
View File

@ -0,0 +1,149 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_BITS_H
#define ZSTD_BITS_H
#include "mem.h"
MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
{
assert(val != 0);
{
static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7,
26, 12, 18, 6, 11, 5, 10, 9};
return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
}
}
MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
{
assert(val != 0);
# if (__GNUC__ >= 4)
return (unsigned)__builtin_ctz(val);
# else
return ZSTD_countTrailingZeros32_fallback(val);
# endif
}
MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
assert(val != 0);
{
static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7,
19, 27, 23, 6, 26, 5, 4, 31};
val |= val >> 1;
val |= val >> 2;
val |= val >> 4;
val |= val >> 8;
val |= val >> 16;
return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
}
}
MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
{
assert(val != 0);
# if (__GNUC__ >= 4)
return (unsigned)__builtin_clz(val);
# else
return ZSTD_countLeadingZeros32_fallback(val);
# endif
}
MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
{
assert(val != 0);
# if (__GNUC__ >= 4) && defined(__LP64__)
return (unsigned)__builtin_ctzll(val);
# else
{
U32 mostSignificantWord = (U32)(val >> 32);
U32 leastSignificantWord = (U32)val;
if (leastSignificantWord == 0) {
return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
} else {
return ZSTD_countTrailingZeros32(leastSignificantWord);
}
}
# endif
}
MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
{
assert(val != 0);
# if (__GNUC__ >= 4)
return (unsigned)(__builtin_clzll(val));
# else
{
U32 mostSignificantWord = (U32)(val >> 32);
U32 leastSignificantWord = (U32)val;
if (mostSignificantWord == 0) {
return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
} else {
return ZSTD_countLeadingZeros32(mostSignificantWord);
}
}
# endif
}
MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
{
if (MEM_isLittleEndian()) {
if (MEM_64bits()) {
return ZSTD_countTrailingZeros64((U64)val) >> 3;
} else {
return ZSTD_countTrailingZeros32((U32)val) >> 3;
}
} else { /* Big Endian CPU */
if (MEM_64bits()) {
return ZSTD_countLeadingZeros64((U64)val) >> 3;
} else {
return ZSTD_countLeadingZeros32((U32)val) >> 3;
}
}
}
MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
{
assert(val != 0);
return 31 - ZSTD_countLeadingZeros32(val);
}
/* ZSTD_rotateRight_*():
* Rotates a bitfield to the right by "count" bits.
* https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
*/
MEM_STATIC
U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
assert(count < 64);
count &= 0x3F; /* for fickle pattern recognition */
return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
}
MEM_STATIC
U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
assert(count < 32);
count &= 0x1F; /* for fickle pattern recognition */
return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
}
MEM_STATIC
U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
assert(count < 16);
count &= 0x0F; /* for fickle pattern recognition */
return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
}
#endif /* ZSTD_BITS_H */

View File

@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* ****************************************************************** /* ******************************************************************
* bitstream * bitstream
* Part of FSE library * Part of FSE library
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -27,6 +28,7 @@
#include "compiler.h" /* UNLIKELY() */ #include "compiler.h" /* UNLIKELY() */
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
#include "error_private.h" /* error codes and messages */ #include "error_private.h" /* error codes and messages */
#include "bits.h" /* ZSTD_highbit32 */
/*========================================= /*=========================================
@ -122,33 +124,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
/* faster, but works only if nbBits >= 1 */ /* faster, but works only if nbBits >= 1 */
/*-**************************************************************
* Internal functions
****************************************************************/
MEM_STATIC unsigned BIT_highbit32 (U32 val)
{
assert(val != 0);
{
# if (__GNUC__ >= 3) /* Use GCC Intrinsic */
return __builtin_clz (val) ^ 31;
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7,
19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
# endif
}
}
/*===== Local Constants =====*/ /*===== Local Constants =====*/
static const unsigned BIT_mask[] = { static const unsigned BIT_mask[] = {
0, 1, 3, 7, 0xF, 0x1F, 0, 1, 3, 7, 0xF, 0x1F,
@ -178,6 +153,12 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
return 0; return 0;
} }
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
{
assert(nbBits < BIT_MASK_SIZE);
return bitContainer & BIT_mask[nbBits];
}
/*! BIT_addBits() : /*! BIT_addBits() :
* can add up to 31 bits into `bitC`. * can add up to 31 bits into `bitC`.
* Note : does not check for register overflow ! */ * Note : does not check for register overflow ! */
@ -187,7 +168,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
assert(nbBits < BIT_MASK_SIZE); assert(nbBits < BIT_MASK_SIZE);
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
bitC->bitPos += nbBits; bitC->bitPos += nbBits;
} }
@ -266,7 +247,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
bitD->bitContainer = MEM_readLEST(bitD->ptr); bitD->bitContainer = MEM_readLEST(bitD->ptr);
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
} else { } else {
bitD->ptr = bitD->start; bitD->ptr = bitD->start;
@ -294,7 +275,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
default: break; default: break;
} }
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
} }
bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
@ -325,12 +306,6 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
#endif #endif
} }
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
{
assert(nbBits < BIT_MASK_SIZE);
return bitContainer & BIT_mask[nbBits];
}
/*! BIT_lookBits() : /*! BIT_lookBits() :
* Provides next n bits from local register. * Provides next n bits from local register.
* local register is not modified. * local register is not modified.
@ -377,7 +352,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned n
} }
/*! BIT_readBitsFast() : /*! BIT_readBitsFast() :
* unsafe version; only works only if nbBits >= 1 */ * unsafe version; only works if nbBits >= 1 */
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
{ {
size_t const value = BIT_lookBitsFast(bitD, nbBits); size_t const value = BIT_lookBitsFast(bitD, nbBits);
@ -408,7 +383,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
* This function is safe, it guarantees it will not read beyond src buffer. * This function is safe, it guarantees it will not read beyond src buffer.
* @return : status of `BIT_DStream_t` internal register. * @return : status of `BIT_DStream_t` internal register.
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) MEM_STATIC FORCE_INLINE_ATTR BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
{ {
if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
return BIT_DStream_overflow; return BIT_DStream_overflow;

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -179,6 +180,17 @@
* Sanitizer * Sanitizer
*****************************************************************/ *****************************************************************/
/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an
* abundance of caution, disable our custom poisoning on mingw. */
#ifdef __MINGW32__
#ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE
#define ZSTD_ASAN_DONT_POISON_WORKSPACE 1
#endif
#ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE
#define ZSTD_MSAN_DONT_POISON_WORKSPACE 1
#endif
#endif
#endif /* ZSTD_COMPILER_H */ #endif /* ZSTD_COMPILER_H */

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* ****************************************************************** /* ******************************************************************
* debug * debug
* Part of FSE library * Part of FSE library
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -21,4 +22,6 @@
#include "debug.h" #include "debug.h"
#if (DEBUGLEVEL>=2)
int g_debuglevel = DEBUGLEVEL; int g_debuglevel = DEBUGLEVEL;
#endif

View File

@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* ****************************************************************** /* ******************************************************************
* debug * debug
* Part of FSE library * Part of FSE library
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* ****************************************************************** /* ******************************************************************
* Common functions of New Generation Entropy library * Common functions of New Generation Entropy library
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -19,8 +20,8 @@
#include "error_private.h" /* ERR_*, ERROR */ #include "error_private.h" /* ERR_*, ERROR */
#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
#include "fse.h" #include "fse.h"
#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
#include "huf.h" #include "huf.h"
#include "bits.h" /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */
/*=== Version ===*/ /*=== Version ===*/
@ -38,23 +39,6 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
/*-************************************************************** /*-**************************************************************
* FSE NCount encoding-decoding * FSE NCount encoding-decoding
****************************************************************/ ****************************************************************/
static U32 FSE_ctz(U32 val)
{
assert(val != 0);
{
# if (__GNUC__ >= 3) /* GCC Intrinsic */
return __builtin_ctz(val);
# else /* Software version */
U32 count = 0;
while ((val & 1) == 0) {
val >>= 1;
++count;
}
return count;
# endif
}
}
FORCE_INLINE_TEMPLATE FORCE_INLINE_TEMPLATE
size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize) const void* headerBuffer, size_t hbSize)
@ -102,7 +86,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
* repeat. * repeat.
* Avoid UB by setting the high bit to 1. * Avoid UB by setting the high bit to 1.
*/ */
int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
while (repeats >= 12) { while (repeats >= 12) {
charnum += 3 * 12; charnum += 3 * 12;
if (LIKELY(ip <= iend-7)) { if (LIKELY(ip <= iend-7)) {
@ -113,7 +97,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
ip = iend - 4; ip = iend - 4;
} }
bitStream = MEM_readLE32(ip) >> bitCount; bitStream = MEM_readLE32(ip) >> bitCount;
repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
} }
charnum += 3 * repeats; charnum += 3 * repeats;
bitStream >>= 2 * repeats; bitStream >>= 2 * repeats;
@ -178,7 +162,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
* know that threshold > 1. * know that threshold > 1.
*/ */
if (remaining <= 1) break; if (remaining <= 1) break;
nbBits = BIT_highbit32(remaining) + 1; nbBits = ZSTD_highbit32(remaining) + 1;
threshold = 1 << (nbBits - 1); threshold = 1 << (nbBits - 1);
} }
if (charnum >= maxSV1) break; if (charnum >= maxSV1) break;
@ -253,7 +237,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
const void* src, size_t srcSize) const void* src, size_t srcSize)
{ {
U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0);
} }
FORCE_INLINE_TEMPLATE size_t FORCE_INLINE_TEMPLATE size_t
@ -301,14 +285,14 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
if (weightTotal == 0) return ERROR(corruption_detected); if (weightTotal == 0) return ERROR(corruption_detected);
/* get last non-null symbol weight (implied, total must be 2^n) */ /* get last non-null symbol weight (implied, total must be 2^n) */
{ U32 const tableLog = BIT_highbit32(weightTotal) + 1; { U32 const tableLog = ZSTD_highbit32(weightTotal) + 1;
if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
*tableLogPtr = tableLog; *tableLogPtr = tableLog;
/* determine last weight */ /* determine last weight */
{ U32 const total = 1 << tableLog; { U32 const total = 1 << tableLog;
U32 const rest = total - weightTotal; U32 const rest = total - weightTotal;
U32 const verif = 1 << BIT_highbit32(rest); U32 const verif = 1 << ZSTD_highbit32(rest);
U32 const lastWeight = BIT_highbit32(rest) + 1; U32 const lastWeight = ZSTD_highbit32(rest) + 1;
if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
huffWeight[oSize] = (BYTE)lastWeight; huffWeight[oSize] = (BYTE)lastWeight;
rankStats[lastWeight]++; rankStats[lastWeight]++;
@ -345,13 +329,13 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr, U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize, const void* src, size_t srcSize,
void* workSpace, size_t wkspSize, void* workSpace, size_t wkspSize,
int bmi2) int flags)
{ {
#if DYNAMIC_BMI2 #if DYNAMIC_BMI2
if (bmi2) { if (flags & HUF_flags_bmi2) {
return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
} }
#endif #endif
(void)bmi2; (void)flags;
return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
} }

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -27,9 +28,11 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(version_unsupported): return "Version not supported"; case PREFIX(version_unsupported): return "Version not supported";
case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
case PREFIX(corruption_detected): return "Corrupted block detected"; case PREFIX(corruption_detected): return "Data corruption detected";
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification";
case PREFIX(parameter_unsupported): return "Unsupported parameter"; case PREFIX(parameter_unsupported): return "Unsupported parameter";
case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters";
case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
case PREFIX(init_missing): return "Context should be init first"; case PREFIX(init_missing): return "Context should be init first";
case PREFIX(memory_allocation): return "Allocation error : not enough memory"; case PREFIX(memory_allocation): return "Allocation error : not enough memory";
@ -38,17 +41,22 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
case PREFIX(stabilityCondition_notRespected): return "pledged buffer stability condition is not respected";
case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
case PREFIX(dictionary_wrong): return "Dictionary mismatch"; case PREFIX(dictionary_wrong): return "Dictionary mismatch";
case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
case PREFIX(srcSize_wrong): return "Src size is incorrect"; case PREFIX(srcSize_wrong): return "Src size is incorrect";
case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
case PREFIX(noForwardProgress_destFull): return "Operation made no progress over multiple calls, due to output buffer being full";
case PREFIX(noForwardProgress_inputEmpty): return "Operation made no progress over multiple calls, due to input being empty";
/* following error codes are not stable and may be removed or changed in a future version */ /* following error codes are not stable and may be removed or changed in a future version */
case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code";
case PREFIX(externalSequences_invalid): return "External sequences are not valid";
case PREFIX(maxCode): case PREFIX(maxCode):
default: return notErrorCode; default: return notErrorCode;
} }

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* ****************************************************************** /* ******************************************************************
* FSE : Finite State Entropy codec * FSE : Finite State Entropy codec
* Public Prototypes declaration * Public Prototypes declaration
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -50,34 +51,6 @@
FSE_PUBLIC_API unsigned FSE_versionNumber(void); /*< library version number; to be used when checking dll version */ FSE_PUBLIC_API unsigned FSE_versionNumber(void); /*< library version number; to be used when checking dll version */
/*-****************************************
* FSE simple functions
******************************************/
/*! FSE_compress() :
Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
@return : size of compressed data (<= dstCapacity).
Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
if FSE_isError(return), compression failed (more details using FSE_getErrorName())
*/
FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
/*! FSE_decompress():
Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
into already allocated destination buffer 'dst', of size 'dstCapacity'.
@return : size of regenerated data (<= maxDstSize),
or an error code, which can be tested using FSE_isError() .
** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
Why ? : making this distinction requires a header.
Header management is intentionally delegated to the user layer, which can better manage special cases.
*/
FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity,
const void* cSrc, size_t cSrcSize);
/*-***************************************** /*-*****************************************
* Tool functions * Tool functions
******************************************/ ******************************************/
@ -88,20 +61,6 @@ FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return
FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
/*-*****************************************
* FSE advanced functions
******************************************/
/*! FSE_compress2() :
Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
Both parameters can be defined as '0' to mean : use default value
@return : size of compressed data
Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
if FSE_isError(return), it's an error code.
*/
FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
/*-***************************************** /*-*****************************************
* FSE detailed API * FSE detailed API
******************************************/ ******************************************/
@ -161,8 +120,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
/*! Constructor and Destructor of FSE_CTable. /*! Constructor and Destructor of FSE_CTable.
Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct);
/*! FSE_buildCTable(): /*! FSE_buildCTable():
Builds `ct`, which must be already allocated, using FSE_createCTable(). Builds `ct`, which must be already allocated, using FSE_createCTable().
@ -238,23 +195,7 @@ FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
const void* rBuffer, size_t rBuffSize, int bmi2); const void* rBuffer, size_t rBuffSize, int bmi2);
/*! Constructor and Destructor of FSE_DTable.
Note that its size depends on 'tableLog' */
typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt);
/*! FSE_buildDTable():
Builds 'dt', which must be already allocated, using FSE_createDTable().
return : 0, or an errorCode, which can be tested using FSE_isError() */
FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
/*! FSE_decompress_usingDTable():
Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
into `dst` which must be already allocated.
@return : size of regenerated data (necessarily <= `dstCapacity`),
or an errorCode, which can be tested using FSE_isError() */
FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
/*! /*!
Tutorial : Tutorial :
@ -317,16 +258,6 @@ If there is an error, the function will return an error code, which can be teste
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
/*< same as FSE_optimalTableLog(), which used `minus==2` */ /*< same as FSE_optimalTableLog(), which used `minus==2` */
/* FSE_compress_wksp() :
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
* FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
*/
#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
/*< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
/*< build a fake FSE_CTable, designed to compress always the same symbolValue */ /*< build a fake FSE_CTable, designed to compress always the same symbolValue */
@ -344,19 +275,11 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
/*< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ /*< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); #define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
/*< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
/*< build a fake FSE_DTable, designed to always generate the same symbolValue */
#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
/*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
/*< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */ /*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`.
* Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */
typedef enum { typedef enum {
FSE_repeat_none, /*< Cannot use the previous table */ FSE_repeat_none, /*< Cannot use the previous table */
@ -552,7 +475,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
/* FSE_getMaxNbBits() : /* FSE_getMaxNbBits() :
* Approximate maximum cost of a symbol, in bits. * Approximate maximum cost of a symbol, in bits.
* Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) * Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
* note 1 : assume symbolValue is valid (<= maxSymbolValue) * note 1 : assume symbolValue is valid (<= maxSymbolValue)
* note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* ****************************************************************** /* ******************************************************************
* FSE : Finite State Entropy decoder * FSE : Finite State Entropy decoder
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -24,6 +25,7 @@
#include "error_private.h" #include "error_private.h"
#define ZSTD_DEPS_NEED_MALLOC #define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h" #include "zstd_deps.h"
#include "bits.h" /* ZSTD_highbit32 */
/* ************************************************************** /* **************************************************************
@ -55,19 +57,6 @@
#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
/* Function templates */
FSE_DTable* FSE_createDTable (unsigned tableLog)
{
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
}
void FSE_freeDTable (FSE_DTable* dt)
{
ZSTD_free(dt);
}
static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
{ {
void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
@ -127,10 +116,10 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
} }
} }
/* Now we spread those positions across the table. /* Now we spread those positions across the table.
* The benefit of doing it in two stages is that we avoid the the * The benefit of doing it in two stages is that we avoid the
* variable size inner loop, which caused lots of branch misses. * variable size inner loop, which caused lots of branch misses.
* Now we can run through all the positions without any branch misses. * Now we can run through all the positions without any branch misses.
* We unroll the loop twice, since that is what emperically worked best. * We unroll the loop twice, since that is what empirically worked best.
*/ */
{ {
size_t position = 0; size_t position = 0;
@ -166,7 +155,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
for (u=0; u<tableSize; u++) { for (u=0; u<tableSize; u++) {
FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol); FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
U32 const nextState = symbolNext[symbol]++; U32 const nextState = symbolNext[symbol]++;
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) ); tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
} } } }
@ -184,49 +173,6 @@ size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsi
/*-******************************************************* /*-*******************************************************
* Decompression (Byte symbols) * Decompression (Byte symbols)
*********************************************************/ *********************************************************/
size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
{
void* ptr = dt;
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
void* dPtr = dt + 1;
FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
DTableH->tableLog = 0;
DTableH->fastMode = 0;
cell->newState = 0;
cell->symbol = symbolValue;
cell->nbBits = 0;
return 0;
}
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
{
void* ptr = dt;
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
void* dPtr = dt + 1;
FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
const unsigned tableSize = 1 << nbBits;
const unsigned tableMask = tableSize - 1;
const unsigned maxSV1 = tableMask+1;
unsigned s;
/* Sanity checks */
if (nbBits < 1) return ERROR(GENERIC); /* min size */
/* Build Decoding Table */
DTableH->tableLog = (U16)nbBits;
DTableH->fastMode = 1;
for (s=0; s<maxSV1; s++) {
dinfo[s].newState = 0;
dinfo[s].symbol = (BYTE)s;
dinfo[s].nbBits = (BYTE)nbBits;
}
return 0;
}
FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic( FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
void* dst, size_t maxDstSize, void* dst, size_t maxDstSize,
@ -290,26 +236,6 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
return op-ostart; return op-ostart;
} }
size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
const void* cSrc, size_t cSrcSize,
const FSE_DTable* dt)
{
const void* ptr = dt;
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
const U32 fastMode = DTableH->fastMode;
/* select fast mode (static) */
if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
}
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
{
return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
}
typedef struct { typedef struct {
short ncount[FSE_MAX_SYMBOL_VALUE + 1]; short ncount[FSE_MAX_SYMBOL_VALUE + 1];
FSE_DTable dtable[]; /* Dynamically sized */ FSE_DTable dtable[]; /* Dynamically sized */
@ -342,7 +268,8 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
} }
if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog); assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
@ -382,9 +309,4 @@ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc,
return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
} }
typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
#endif /* FSE_COMMONDEFS_ONLY */ #endif /* FSE_COMMONDEFS_ONLY */

View File

@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* ****************************************************************** /* ******************************************************************
* huff0 huffman codec, * huff0 huffman codec,
* part of Finite State Entropy library * part of Finite State Entropy library
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -18,100 +19,23 @@
/* *** Dependencies *** */ /* *** Dependencies *** */
#include "zstd_deps.h" /* size_t */ #include "zstd_deps.h" /* size_t */
#include "mem.h" /* U32 */
/* *** library symbols visibility *** */
/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
* HUF symbols remain "private" (internal symbols for library only).
* Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
# define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */
# define HUF_PUBLIC_API __declspec(dllexport)
#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
#else
# define HUF_PUBLIC_API
#endif
/* ========================== */
/* *** simple functions *** */
/* ========================== */
/* HUF_compress() :
* Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
* 'dst' buffer must be already allocated.
* Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
* `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
* @return : size of compressed data (<= `dstCapacity`).
* Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
* if HUF_isError(return), compression failed (more details using HUF_getErrorName())
*/
HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
/* HUF_decompress() :
* Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
* into already allocated buffer 'dst', of minimum size 'dstSize'.
* `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
* Note : in contrast with FSE, HUF_decompress can regenerate
* RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
* because it knows size to regenerate (originalSize).
* @return : size of regenerated data (== originalSize),
* or an error code, which can be tested using HUF_isError()
*/
HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize,
const void* cSrc, size_t cSrcSize);
/* *** Tool functions *** */
#define HUF_BLOCKSIZE_MAX (128 * 1024) /*< maximum input size for a single block compressed with HUF_compress */
HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /*< maximum compressed size (worst case) */
/* Error Management */
HUF_PUBLIC_API unsigned HUF_isError(size_t code); /*< tells if a return value is an error code */
HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /*< provides error code string (useful for debugging) */
/* *** Advanced function *** */
/* HUF_compress2() :
* Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
* `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
* `tableLog` must be `<= HUF_TABLELOG_MAX` . */
HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog);
/* HUF_compress4X_wksp() :
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
* `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize);
#endif /* HUF_H_298734234 */
/* ******************************************************************
* WARNING !!
* The following section contains advanced and experimental definitions
* which shall never be used in the context of a dynamic library,
* because they are not guaranteed to remain stable in the future.
* Only consider them in association with static linking.
* *****************************************************************/
#if !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
#define HUF_H_HUF_STATIC_LINKING_ONLY
/* *** Dependencies *** */
#include "mem.h" /* U32 */
#define FSE_STATIC_LINKING_ONLY #define FSE_STATIC_LINKING_ONLY
#include "fse.h" #include "fse.h"
/* *** Tool functions *** */
#define HUF_BLOCKSIZE_MAX (128 * 1024) /*< maximum input size for a single block compressed with HUF_compress */
size_t HUF_compressBound(size_t size); /*< maximum compressed size (worst case) */
/* Error Management */
unsigned HUF_isError(size_t code); /*< tells if a return value is an error code */
const char* HUF_getErrorName(size_t code); /*< provides error code string (useful for debugging) */
#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
/* *** Constants *** */ /* *** Constants *** */
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */ #define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ #define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
@ -151,25 +75,49 @@ typedef U32 HUF_DTable;
/* **************************************** /* ****************************************
* Advanced decompression functions * Advanced decompression functions
******************************************/ ******************************************/
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */
#endif
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< decodes RLE and uncompressed */ /*
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< considers RLE and uncompressed as errors */ * Huffman flags bitset.
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< considers RLE and uncompressed as errors */ * For all flags, 0 is the default value.
size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ */
size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder */ typedef enum {
#ifndef HUF_FORCE_DECOMPRESS_X1 /*
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ * If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime.
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder */ * Otherwise: Ignored.
#endif */
HUF_flags_bmi2 = (1 << 0),
/*
* If set: Test possible table depths to find the one that produces the smallest header + encoded size.
* If unset: Use heuristic to find the table depth.
*/
HUF_flags_optimalDepth = (1 << 1),
/*
* If set: If the previous table can encode the input, always reuse the previous table.
* If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output.
*/
HUF_flags_preferRepeat = (1 << 2),
/*
* If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress.
* If unset: Always histogram the entire input.
*/
HUF_flags_suspectUncompressible = (1 << 3),
/*
* If set: Don't use assembly implementations
* If unset: Allow using assembly implementations
*/
HUF_flags_disableAsm = (1 << 4),
/*
* If set: Don't use the fast decoding loop, always use the fallback decoding loop.
* If unset: Use the fast decoding loop when possible.
*/
HUF_flags_disableFast = (1 << 5)
} HUF_flags_e;
/* **************************************** /* ****************************************
* HUF detailed API * HUF detailed API
* ****************************************/ * ****************************************/
#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra
/*! HUF_compress() does the following: /*! HUF_compress() does the following:
* 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
@ -182,12 +130,12 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
* For example, it's possible to compress several blocks using the same 'CTable', * For example, it's possible to compress several blocks using the same 'CTable',
* or to save and regenerate 'CTable' using external methods. * or to save and regenerate 'CTable' using external methods.
*/ */
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); unsigned HUF_minTableLog(unsigned symbolCardinality);
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue);
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace,
size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
@ -196,6 +144,7 @@ typedef enum {
HUF_repeat_check, /*< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ HUF_repeat_check, /*< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
HUF_repeat_valid /*< Can use the previous table and it is assumed to be valid */ HUF_repeat_valid /*< Can use the previous table and it is assumed to be valid */
} HUF_repeat; } HUF_repeat;
/* HUF_compress4X_repeat() : /* HUF_compress4X_repeat() :
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat. * If it uses hufTable it does not modify hufTable or repeat.
@ -206,13 +155,13 @@ size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog, unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible); HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
/* HUF_buildCTable_wksp() : /* HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer. * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
* `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
*/ */
#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) #define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192)
#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
size_t HUF_buildCTable_wksp (HUF_CElt* tree, size_t HUF_buildCTable_wksp (HUF_CElt* tree,
const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
@ -238,7 +187,7 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize, const void* src, size_t srcSize,
void* workspace, size_t wkspSize, void* workspace, size_t wkspSize,
int bmi2); int flags);
/* HUF_readCTable() : /* HUF_readCTable() :
* Loading a CTable saved with HUF_writeCTable() */ * Loading a CTable saved with HUF_writeCTable() */
@ -276,32 +225,12 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9)) #define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
#endif
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif
/* ====================== */ /* ====================== */
/* single stream variants */ /* single stream variants */
/* ====================== */ /* ====================== */
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
/* HUF_compress1X_repeat() : /* HUF_compress1X_repeat() :
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat. * If it uses hufTable it does not modify hufTable or repeat.
@ -312,47 +241,28 @@ size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog, unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible); HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
#ifndef HUF_FORCE_DECOMPRESS_X1 #ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); /*< double-symbols decoder */
#endif
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder */
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder */
#endif
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /*< automatic selection of sing or double symbol decoder, based on DTable */
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif #endif
/* BMI2 variants. /* BMI2 variants.
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
*/ */
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
#ifndef HUF_FORCE_DECOMPRESS_X2 #ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
#endif #endif
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
#ifndef HUF_FORCE_DECOMPRESS_X2 #ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
#endif #endif
#ifndef HUF_FORCE_DECOMPRESS_X1 #ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
#endif #endif
#endif /* HUF_STATIC_LINKING_ONLY */ #endif /* HUF_H_298734234 */

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -12,7 +13,7 @@
#define ZSTD_PORTABILITY_MACROS_H #define ZSTD_PORTABILITY_MACROS_H
/* /*
* This header file contains macro defintions to support portability. * This header file contains macro definitions to support portability.
* This header is shared between C and ASM code, so it MUST only * This header is shared between C and ASM code, so it MUST only
* contain macro definitions. It MUST not contain any C code. * contain macro definitions. It MUST not contain any C code.
* *
@ -65,7 +66,7 @@
#endif #endif
/* /*
* Only enable assembly for GNUC comptabile compilers, * Only enable assembly for GNUC compatible compilers,
* because other platforms may not support GAS assembly syntax. * because other platforms may not support GAS assembly syntax.
* *
* Only enable assembly for Linux / MacOS, other platforms may * Only enable assembly for Linux / MacOS, other platforms may
@ -90,4 +91,23 @@
*/ */
#define ZSTD_ENABLE_ASM_X86_64_BMI2 0 #define ZSTD_ENABLE_ASM_X86_64_BMI2 0
/*
* For x86 ELF targets, add .note.gnu.property section for Intel CET in
* assembly sources when CET is enabled.
*
* Additionally, any function that may be called indirectly must begin
* with ZSTD_CET_ENDBRANCH.
*/
#if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \
&& defined(__has_include)
# if __has_include(<cet.h>)
# include <cet.h>
# define ZSTD_CET_ENDBRANCH _CET_ENDBR
# endif
#endif
#ifndef ZSTD_CET_ENDBRANCH
# define ZSTD_CET_ENDBRANCH
#endif
#endif /* ZSTD_PORTABILITY_MACROS_H */ #endif /* ZSTD_PORTABILITY_MACROS_H */

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -14,7 +15,6 @@
* Dependencies * Dependencies
***************************************/ ***************************************/
#define ZSTD_DEPS_NEED_MALLOC #define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
#include "error_private.h" #include "error_private.h"
#include "zstd_internal.h" #include "zstd_internal.h"
@ -47,37 +47,3 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
/*! ZSTD_getErrorString() : /*! ZSTD_getErrorString() :
* provides error code string from enum */ * provides error code string from enum */
const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
/*=**************************************************************
* Custom allocator
****************************************************************/
void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc)
return customMem.customAlloc(customMem.opaque, size);
return ZSTD_malloc(size);
}
void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc) {
/* calloc implemented as malloc+memset;
* not as efficient as calloc, but next best guess for custom malloc */
void* const ptr = customMem.customAlloc(customMem.opaque, size);
ZSTD_memset(ptr, 0, size);
return ptr;
}
return ZSTD_calloc(1, size);
}
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
{
if (ptr!=NULL) {
if (customMem.customFree)
customMem.customFree(customMem.opaque, ptr);
else
ZSTD_free(ptr);
}
}

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -105,3 +105,17 @@ static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) {
#endif /* ZSTD_DEPS_IO */ #endif /* ZSTD_DEPS_IO */
#endif /* ZSTD_DEPS_NEED_IO */ #endif /* ZSTD_DEPS_NEED_IO */
/*
* Only requested when MSAN is enabled.
* Need:
* intptr_t
*/
#ifdef ZSTD_DEPS_NEED_STDINT
#ifndef ZSTD_DEPS_STDINT
#define ZSTD_DEPS_STDINT
/* intptr_t already provided by ZSTD_DEPS_COMMON */
#endif /* ZSTD_DEPS_STDINT */
#endif /* ZSTD_DEPS_NEED_STDINT */

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -28,7 +29,6 @@
#include <linux/zstd.h> #include <linux/zstd.h>
#define FSE_STATIC_LINKING_ONLY #define FSE_STATIC_LINKING_ONLY
#include "fse.h" #include "fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "huf.h" #include "huf.h"
#include <linux/xxhash.h> /* XXH_reset, update, digest */ #include <linux/xxhash.h> /* XXH_reset, update, digest */
#define ZSTD_TRACE 0 #define ZSTD_TRACE 0
@ -83,9 +83,9 @@ typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
#define ZSTD_FRAMECHECKSUMSIZE 4 #define ZSTD_FRAMECHECKSUMSIZE 4
#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */
#define MIN_LITERALS_FOR_4_STREAMS 6
#define HufLog 12
typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
#define LONGNBSEQ 0x7F00 #define LONGNBSEQ 0x7F00
@ -93,6 +93,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
#define MINMATCH 3 #define MINMATCH 3
#define Litbits 8 #define Litbits 8
#define LitHufLog 11
#define MaxLit ((1<<Litbits) - 1) #define MaxLit ((1<<Litbits) - 1)
#define MaxML 52 #define MaxML 52
#define MaxLL 35 #define MaxLL 35
@ -103,6 +104,8 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
#define LLFSELog 9 #define LLFSELog 9
#define OffFSELog 8 #define OffFSELog 8
#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog) #define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
#define MaxMLBits 16
#define MaxLLBits 16
#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */ #define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
/* Each table cannot take more than #symbols * FSELog bits */ /* Each table cannot take more than #symbols * FSELog bits */
@ -225,12 +228,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
* one COPY16() in the first call. Then, do two calls per loop since * one COPY16() in the first call. Then, do two calls per loop since
* at that point it is more likely to have a high trip count. * at that point it is more likely to have a high trip count.
*/ */
#ifdef __aarch64__
do {
COPY16(op, ip);
}
while (op < oend);
#else
ZSTD_copy16(op, ip); ZSTD_copy16(op, ip);
if (16 >= length) return; if (16 >= length) return;
op += 16; op += 16;
@ -240,7 +237,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
COPY16(op, ip); COPY16(op, ip);
} }
while (op < oend); while (op < oend);
#endif
} }
} }
@ -289,11 +285,11 @@ typedef enum {
typedef struct { typedef struct {
seqDef* sequencesStart; seqDef* sequencesStart;
seqDef* sequences; /* ptr to end of sequences */ seqDef* sequences; /* ptr to end of sequences */
BYTE* litStart; BYTE* litStart;
BYTE* lit; /* ptr to end of literals */ BYTE* lit; /* ptr to end of literals */
BYTE* llCode; BYTE* llCode;
BYTE* mlCode; BYTE* mlCode;
BYTE* ofCode; BYTE* ofCode;
size_t maxNbSeq; size_t maxNbSeq;
size_t maxNbLit; size_t maxNbLit;
@ -301,8 +297,8 @@ typedef struct {
* in the seqStore that has a value larger than U16 (if it exists). To do so, we increment * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
* the existing value of the litLength or matchLength by 0x10000. * the existing value of the litLength or matchLength by 0x10000.
*/ */
ZSTD_longLengthType_e longLengthType; ZSTD_longLengthType_e longLengthType;
U32 longLengthPos; /* Index of the sequence to apply long length modification to */ U32 longLengthPos; /* Index of the sequence to apply long length modification to */
} seqStore_t; } seqStore_t;
typedef struct { typedef struct {
@ -321,10 +317,10 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
seqLen.matchLength = seq->mlBase + MINMATCH; seqLen.matchLength = seq->mlBase + MINMATCH;
if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
if (seqStore->longLengthType == ZSTD_llt_literalLength) { if (seqStore->longLengthType == ZSTD_llt_literalLength) {
seqLen.litLength += 0xFFFF; seqLen.litLength += 0x10000;
} }
if (seqStore->longLengthType == ZSTD_llt_matchLength) { if (seqStore->longLengthType == ZSTD_llt_matchLength) {
seqLen.matchLength += 0xFFFF; seqLen.matchLength += 0x10000;
} }
} }
return seqLen; return seqLen;
@ -337,72 +333,13 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
* `decompressedBound != ZSTD_CONTENTSIZE_ERROR` * `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
*/ */
typedef struct { typedef struct {
size_t nbBlocks;
size_t compressedSize; size_t compressedSize;
unsigned long long decompressedBound; unsigned long long decompressedBound;
} ZSTD_frameSizeInfo; /* decompress & legacy */ } ZSTD_frameSizeInfo; /* decompress & legacy */
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ int ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
/* custom memory allocation functions */
void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
{
assert(val != 0);
{
# if (__GNUC__ >= 3) /* GCC Intrinsic */
return __builtin_clz (val) ^ 31;
# else /* Software version */
static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
# endif
}
}
/*
* Counts the number of trailing zeros of a `size_t`.
* Most compilers should support CTZ as a builtin. A backup
* implementation is provided if the builtin isn't supported, but
* it may not be terribly efficient.
*/
MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
{
if (MEM_64bits()) {
# if (__GNUC__ >= 4)
return __builtin_ctzll((U64)val);
# else
static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19,
4, 25, 14, 28, 9, 34, 20, 56,
5, 17, 26, 54, 15, 41, 29, 43,
10, 31, 38, 35, 21, 45, 49, 57,
63, 6, 12, 18, 24, 27, 33, 55,
16, 53, 40, 42, 30, 37, 44, 48,
62, 11, 23, 32, 52, 39, 36, 47,
61, 22, 51, 46, 60, 50, 59, 58 };
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
} else { /* 32 bits */
# if (__GNUC__ >= 3)
return __builtin_ctz((U32)val);
# else
static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3,
30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7,
26, 12, 18, 6, 11, 5, 10, 9 };
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
}
/* ZSTD_invalidateRepCodes() : /* ZSTD_invalidateRepCodes() :

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* ****************************************************************** /* ******************************************************************
* FSE : Finite State Entropy encoder * FSE : Finite State Entropy encoder
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -26,6 +27,7 @@
#define ZSTD_DEPS_NEED_MALLOC #define ZSTD_DEPS_NEED_MALLOC
#define ZSTD_DEPS_NEED_MATH64 #define ZSTD_DEPS_NEED_MATH64
#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */ #include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
#include "../common/bits.h" /* ZSTD_highbit32 */
/* ************************************************************** /* **************************************************************
@ -90,7 +92,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
assert(tableLog < 16); /* required for threshold strategy to work */ assert(tableLog < 16); /* required for threshold strategy to work */
/* For explanations on how to distribute symbol values over the table : /* For explanations on how to distribute symbol values over the table :
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ * https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
#ifdef __clang_analyzer__ #ifdef __clang_analyzer__
ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
@ -191,7 +193,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
break; break;
default : default :
assert(normalizedCounter[s] > 1); assert(normalizedCounter[s] > 1);
{ U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1); { U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);
U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut; U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]); symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
@ -342,21 +344,11 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize,
* FSE Compression Code * FSE Compression Code
****************************************************************/ ****************************************************************/
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
{
size_t size;
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
return (FSE_CTable*)ZSTD_malloc(size);
}
void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
/* provides the minimum logSize to safely represent a distribution */ /* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
{ {
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
assert(srcSize > 1); /* Not supported, RLE should be used instead */ assert(srcSize > 1); /* Not supported, RLE should be used instead */
return minBits; return minBits;
@ -364,7 +356,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
{ {
U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;
U32 tableLog = maxTableLog; U32 tableLog = maxTableLog;
U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
assert(srcSize > 1); /* Not supported, RLE should be used instead */ assert(srcSize > 1); /* Not supported, RLE should be used instead */
@ -532,40 +524,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
return tableLog; return tableLog;
} }
/* fake FSE_CTable, for raw (uncompressed) input */
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
{
const unsigned tableSize = 1 << nbBits;
const unsigned tableMask = tableSize - 1;
const unsigned maxSymbolValue = tableMask;
void* const ptr = ct;
U16* const tableU16 = ( (U16*) ptr) + 2;
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
unsigned s;
/* Sanity checks */
if (nbBits < 1) return ERROR(GENERIC); /* min size */
/* header */
tableU16[-2] = (U16) nbBits;
tableU16[-1] = (U16) maxSymbolValue;
/* Build table */
for (s=0; s<tableSize; s++)
tableU16[s] = (U16)(tableSize + s);
/* Build Symbol Transformation Table */
{ const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
for (s=0; s<=maxSymbolValue; s++) {
symbolTT[s].deltaNbBits = deltaNbBits;
symbolTT[s].deltaFindState = s-1;
} }
return 0;
}
/* fake FSE_CTable, for rle input (always same symbol) */ /* fake FSE_CTable, for rle input (always same symbol) */
size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
{ {
@ -664,5 +622,4 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
#endif /* FSE_COMMONDEFS_ONLY */ #endif /* FSE_COMMONDEFS_ONLY */

View File

@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* ****************************************************************** /* ******************************************************************
* hist : Histogram functions * hist : Histogram functions
* part of Finite State Entropy project * part of Finite State Entropy project
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy

View File

@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* ****************************************************************** /* ******************************************************************
* hist : Histogram functions * hist : Histogram functions
* part of Finite State Entropy project * part of Finite State Entropy project
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* ****************************************************************** /* ******************************************************************
* Huffman encoder, part of New Generation Entropy library * Huffman encoder, part of New Generation Entropy library
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -26,9 +27,9 @@
#include "hist.h" #include "hist.h"
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
#include "../common/fse.h" /* header compression */ #include "../common/fse.h" /* header compression */
#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h" #include "../common/huf.h"
#include "../common/error_private.h" #include "../common/error_private.h"
#include "../common/bits.h" /* ZSTD_highbit32 */
/* ************************************************************** /* **************************************************************
@ -39,13 +40,67 @@
/* ************************************************************** /* **************************************************************
* Utils * Required declarations
****************************************************************/ ****************************************************************/
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) typedef struct nodeElt_s {
U32 count;
U16 parent;
BYTE byte;
BYTE nbBits;
} nodeElt;
/* **************************************************************
* Debug Traces
****************************************************************/
#if DEBUGLEVEL >= 2
static size_t showU32(const U32* arr, size_t size)
{ {
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); size_t u;
for (u=0; u<size; u++) {
RAWLOG(6, " %u", arr[u]); (void)arr;
}
RAWLOG(6, " \n");
return size;
} }
static size_t HUF_getNbBits(HUF_CElt elt);
static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
{
size_t u;
for (u=0; u<size; u++) {
RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
}
RAWLOG(6, " \n");
return size;
}
static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
{
size_t u;
for (u=0; u<size; u++) {
RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
}
RAWLOG(6, " \n");
return size;
}
static size_t showHNodeBits(const nodeElt* hnode, size_t size)
{
size_t u;
for (u=0; u<size; u++) {
RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
}
RAWLOG(6, " \n");
return size;
}
#endif
/* ******************************************************* /* *******************************************************
* HUF : Huffman block compression * HUF : Huffman block compression
@ -86,7 +141,10 @@ typedef struct {
S16 norm[HUF_TABLELOG_MAX+1]; S16 norm[HUF_TABLELOG_MAX+1];
} HUF_CompressWeightsWksp; } HUF_CompressWeightsWksp;
static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize) static size_t
HUF_compressWeights(void* dst, size_t dstSize,
const void* weightTable, size_t wtSize,
void* workspace, size_t workspaceSize)
{ {
BYTE* const ostart = (BYTE*) dst; BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart; BYTE* op = ostart;
@ -137,7 +195,7 @@ static size_t HUF_getNbBitsFast(HUF_CElt elt)
static size_t HUF_getValue(HUF_CElt elt) static size_t HUF_getValue(HUF_CElt elt)
{ {
return elt & ~0xFF; return elt & ~(size_t)0xFF;
} }
static size_t HUF_getValueFast(HUF_CElt elt) static size_t HUF_getValueFast(HUF_CElt elt)
@ -175,6 +233,8 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
U32 n; U32 n;
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32)); HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
/* check conditions */ /* check conditions */
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
@ -204,16 +264,6 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
return ((maxSymbolValue+1)/2) + 1; return ((maxSymbolValue+1)/2) + 1;
} }
/*! HUF_writeCTable() :
`CTable` : Huffman tree to save, using huf representation.
@return : size of saved CTable */
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
{
HUF_WriteCTableWksp wksp;
return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
}
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
{ {
@ -269,68 +319,64 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue) U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
{ {
const HUF_CElt* ct = CTable + 1; const HUF_CElt* const ct = CTable + 1;
assert(symbolValue <= HUF_SYMBOLVALUE_MAX); assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
return (U32)HUF_getNbBits(ct[symbolValue]); return (U32)HUF_getNbBits(ct[symbolValue]);
} }
typedef struct nodeElt_s {
U32 count;
U16 parent;
BYTE byte;
BYTE nbBits;
} nodeElt;
/* /*
* HUF_setMaxHeight(): * HUF_setMaxHeight():
* Enforces maxNbBits on the Huffman tree described in huffNode. * Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
* *
* It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts * It attempts to convert all nodes with nbBits > @targetNbBits
* the tree to so that it is a valid canonical Huffman tree. * to employ @targetNbBits instead. Then it adjusts the tree
* so that it remains a valid canonical Huffman tree.
* *
* @pre The sum of the ranks of each symbol == 2^largestBits, * @pre The sum of the ranks of each symbol == 2^largestBits,
* where largestBits == huffNode[lastNonNull].nbBits. * where largestBits == huffNode[lastNonNull].nbBits.
* @post The sum of the ranks of each symbol == 2^largestBits, * @post The sum of the ranks of each symbol == 2^largestBits,
* where largestBits is the return value <= maxNbBits. * where largestBits is the return value (expected <= targetNbBits).
* *
* @param huffNode The Huffman tree modified in place to enforce maxNbBits. * @param huffNode The Huffman tree modified in place to enforce targetNbBits.
* It's presumed sorted, from most frequent to rarest symbol.
* @param lastNonNull The symbol with the lowest count in the Huffman tree. * @param lastNonNull The symbol with the lowest count in the Huffman tree.
* @param maxNbBits The maximum allowed number of bits, which the Huffman tree * @param targetNbBits The allowed number of bits, which the Huffman tree
* may not respect. After this function the Huffman tree will * may not respect. After this function the Huffman tree will
* respect maxNbBits. * respect targetNbBits.
* @return The maximum number of bits of the Huffman tree after adjustment, * @return The maximum number of bits of the Huffman tree after adjustment.
* necessarily no more than maxNbBits.
*/ */
static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
{ {
const U32 largestBits = huffNode[lastNonNull].nbBits; const U32 largestBits = huffNode[lastNonNull].nbBits;
/* early exit : no elt > maxNbBits, so the tree is already valid. */ /* early exit : no elt > targetNbBits, so the tree is already valid. */
if (largestBits <= maxNbBits) return largestBits; if (largestBits <= targetNbBits) return largestBits;
DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
/* there are several too large elements (at least >= 2) */ /* there are several too large elements (at least >= 2) */
{ int totalCost = 0; { int totalCost = 0;
const U32 baseCost = 1 << (largestBits - maxNbBits); const U32 baseCost = 1 << (largestBits - targetNbBits);
int n = (int)lastNonNull; int n = (int)lastNonNull;
/* Adjust any ranks > maxNbBits to maxNbBits. /* Adjust any ranks > targetNbBits to targetNbBits.
* Compute totalCost, which is how far the sum of the ranks is * Compute totalCost, which is how far the sum of the ranks is
* we are over 2^largestBits after adjust the offending ranks. * we are over 2^largestBits after adjust the offending ranks.
*/ */
while (huffNode[n].nbBits > maxNbBits) { while (huffNode[n].nbBits > targetNbBits) {
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
huffNode[n].nbBits = (BYTE)maxNbBits; huffNode[n].nbBits = (BYTE)targetNbBits;
n--; n--;
} }
/* n stops at huffNode[n].nbBits <= maxNbBits */ /* n stops at huffNode[n].nbBits <= targetNbBits */
assert(huffNode[n].nbBits <= maxNbBits); assert(huffNode[n].nbBits <= targetNbBits);
/* n end at index of smallest symbol using < maxNbBits */ /* n end at index of smallest symbol using < targetNbBits */
while (huffNode[n].nbBits == maxNbBits) --n; while (huffNode[n].nbBits == targetNbBits) --n;
/* renorm totalCost from 2^largestBits to 2^maxNbBits /* renorm totalCost from 2^largestBits to 2^targetNbBits
* note : totalCost is necessarily a multiple of baseCost */ * note : totalCost is necessarily a multiple of baseCost */
assert((totalCost & (baseCost - 1)) == 0); assert(((U32)totalCost & (baseCost - 1)) == 0);
totalCost >>= (largestBits - maxNbBits); totalCost >>= (largestBits - targetNbBits);
assert(totalCost > 0); assert(totalCost > 0);
/* repay normalized cost */ /* repay normalized cost */
@ -339,19 +385,19 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
/* Get pos of last (smallest = lowest cum. count) symbol per rank */ /* Get pos of last (smallest = lowest cum. count) symbol per rank */
ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
{ U32 currentNbBits = maxNbBits; { U32 currentNbBits = targetNbBits;
int pos; int pos;
for (pos=n ; pos >= 0; pos--) { for (pos=n ; pos >= 0; pos--) {
if (huffNode[pos].nbBits >= currentNbBits) continue; if (huffNode[pos].nbBits >= currentNbBits) continue;
currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */
rankLast[maxNbBits-currentNbBits] = (U32)pos; rankLast[targetNbBits-currentNbBits] = (U32)pos;
} } } }
while (totalCost > 0) { while (totalCost > 0) {
/* Try to reduce the next power of 2 above totalCost because we /* Try to reduce the next power of 2 above totalCost because we
* gain back half the rank. * gain back half the rank.
*/ */
U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
U32 const highPos = rankLast[nBitsToDecrease]; U32 const highPos = rankLast[nBitsToDecrease];
U32 const lowPos = rankLast[nBitsToDecrease-1]; U32 const lowPos = rankLast[nBitsToDecrease-1];
@ -391,7 +437,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
rankLast[nBitsToDecrease] = noSymbol; rankLast[nBitsToDecrease] = noSymbol;
else { else {
rankLast[nBitsToDecrease]--; rankLast[nBitsToDecrease]--;
if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
} }
} /* while (totalCost > 0) */ } /* while (totalCost > 0) */
@ -403,11 +449,11 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
* TODO. * TODO.
*/ */
while (totalCost < 0) { /* Sometimes, cost correction overshoot */ while (totalCost < 0) { /* Sometimes, cost correction overshoot */
/* special case : no rank 1 symbol (using maxNbBits-1); /* special case : no rank 1 symbol (using targetNbBits-1);
* let's create one from largest rank 0 (using maxNbBits). * let's create one from largest rank 0 (using targetNbBits).
*/ */
if (rankLast[1] == noSymbol) { if (rankLast[1] == noSymbol) {
while (huffNode[n].nbBits == maxNbBits) n--; while (huffNode[n].nbBits == targetNbBits) n--;
huffNode[n+1].nbBits--; huffNode[n+1].nbBits--;
assert(n >= 0); assert(n >= 0);
rankLast[1] = (U32)(n+1); rankLast[1] = (U32)(n+1);
@ -421,7 +467,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
} /* repay normalized cost */ } /* repay normalized cost */
} /* there are several too large elements (at least >= 2) */ } /* there are several too large elements (at least >= 2) */
return maxNbBits; return targetNbBits;
} }
typedef struct { typedef struct {
@ -429,7 +475,7 @@ typedef struct {
U16 curr; U16 curr;
} rankPos; } rankPos;
typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];
/* Number of buckets available for HUF_sort() */ /* Number of buckets available for HUF_sort() */
#define RANK_POSITION_TABLE_SIZE 192 #define RANK_POSITION_TABLE_SIZE 192
@ -448,8 +494,8 @@ typedef struct {
* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing. * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
*/ */
#define RANK_POSITION_MAX_COUNT_LOG 32 #define RANK_POSITION_MAX_COUNT_LOG 32
#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */ #define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */ #define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)
/* Return the appropriate bucket index for a given count. See definition of /* Return the appropriate bucket index for a given count. See definition of
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy. * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
@ -457,7 +503,7 @@ typedef struct {
static U32 HUF_getIndex(U32 const count) { static U32 HUF_getIndex(U32 const count) {
return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF) return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
? count ? count
: BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN; : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
} }
/* Helper swap function for HUF_quickSortPartition() */ /* Helper swap function for HUF_quickSortPartition() */
@ -580,7 +626,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy
/* Sort each bucket. */ /* Sort each bucket. */
for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) { for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base; int const bucketSize = rankPosition[n].curr - rankPosition[n].base;
U32 const bucketStartIdx = rankPosition[n].base; U32 const bucketStartIdx = rankPosition[n].base;
if (bucketSize > 1) { if (bucketSize > 1) {
assert(bucketStartIdx < maxSymbolValue1); assert(bucketStartIdx < maxSymbolValue1);
@ -591,6 +637,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy
assert(HUF_isSorted(huffNode, maxSymbolValue1)); assert(HUF_isSorted(huffNode, maxSymbolValue1));
} }
/* HUF_buildCTable_wksp() : /* HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer. * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
@ -611,6 +658,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
int lowS, lowN; int lowS, lowN;
int nodeNb = STARTNODE; int nodeNb = STARTNODE;
int n, nodeRoot; int n, nodeRoot;
DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
/* init for parents */ /* init for parents */
nonNullRank = (int)maxSymbolValue; nonNullRank = (int)maxSymbolValue;
while(huffNode[nonNullRank].count == 0) nonNullRank--; while(huffNode[nonNullRank].count == 0) nonNullRank--;
@ -637,6 +685,8 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
for (n=0; n<=nonNullRank; n++) for (n=0; n<=nonNullRank; n++)
huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
return nonNullRank; return nonNullRank;
} }
@ -674,28 +724,36 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
CTable[0] = maxNbBits; CTable[0] = maxNbBits;
} }
size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) size_t
HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
void* workSpace, size_t wkspSize)
{ {
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32)); HUF_buildCTable_wksp_tables* const wksp_tables =
(HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl; nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
nodeElt* const huffNode = huffNode0+1; nodeElt* const huffNode = huffNode0+1;
int nonNullRank; int nonNullRank;
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));
DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
/* safety checks */ /* safety checks */
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
return ERROR(workSpace_tooSmall); return ERROR(workSpace_tooSmall);
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
return ERROR(maxSymbolValue_tooLarge); return ERROR(maxSymbolValue_tooLarge);
ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
/* sort, decreasing order */ /* sort, decreasing order */
HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
/* build tree */ /* build tree */
nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
/* enforce maxTableLog */ /* determine and enforce maxTableLog */
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
@ -804,7 +862,7 @@ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int id
#if DEBUGLEVEL >= 1 #if DEBUGLEVEL >= 1
{ {
size_t const nbBits = HUF_getNbBits(elt); size_t const nbBits = HUF_getNbBits(elt);
size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1; size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
(void)dirtyBits; (void)dirtyBits;
/* Middle bits are 0. */ /* Middle bits are 0. */
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0); assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
@ -884,7 +942,7 @@ static size_t HUF_closeCStream(HUF_CStream_t* bitC)
{ {
size_t const nbBits = bitC->bitPos[0] & 0xFF; size_t const nbBits = bitC->bitPos[0] & 0xFF;
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
return (bitC->ptr - bitC->startPtr) + (nbBits > 0); return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);
} }
} }
@ -1045,9 +1103,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
static size_t static size_t
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
const HUF_CElt* CTable, const int bmi2) const HUF_CElt* CTable, const int flags)
{ {
if (bmi2) { if (flags & HUF_flags_bmi2) {
return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
} }
return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
@ -1058,28 +1116,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
static size_t static size_t
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
const HUF_CElt* CTable, const int bmi2) const HUF_CElt* CTable, const int flags)
{ {
(void)bmi2; (void)flags;
return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
} }
#endif #endif
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
{ {
return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
}
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
{
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
} }
static size_t static size_t
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
const HUF_CElt* CTable, int bmi2) const HUF_CElt* CTable, int flags)
{ {
size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
const BYTE* ip = (const BYTE*) src; const BYTE* ip = (const BYTE*) src;
@ -1093,7 +1146,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
op += 6; /* jumpTable */ op += 6; /* jumpTable */
assert(op <= oend); assert(op <= oend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0; if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart, (U16)cSize); MEM_writeLE16(ostart, (U16)cSize);
op += cSize; op += cSize;
@ -1101,7 +1154,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize; ip += segmentSize;
assert(op <= oend); assert(op <= oend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0; if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart+2, (U16)cSize); MEM_writeLE16(ostart+2, (U16)cSize);
op += cSize; op += cSize;
@ -1109,7 +1162,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize; ip += segmentSize;
assert(op <= oend); assert(op <= oend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0; if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart+4, (U16)cSize); MEM_writeLE16(ostart+4, (U16)cSize);
op += cSize; op += cSize;
@ -1118,7 +1171,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize; ip += segmentSize;
assert(op <= oend); assert(op <= oend);
assert(ip <= iend); assert(ip <= iend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0; if (cSize == 0 || cSize > 65535) return 0;
op += cSize; op += cSize;
} }
@ -1126,14 +1179,9 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
return (size_t)(op-ostart); return (size_t)(op-ostart);
} }
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
{ {
return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
}
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
{
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
} }
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
@ -1141,11 +1189,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
static size_t HUF_compressCTable_internal( static size_t HUF_compressCTable_internal(
BYTE* const ostart, BYTE* op, BYTE* const oend, BYTE* const ostart, BYTE* op, BYTE* const oend,
const void* src, size_t srcSize, const void* src, size_t srcSize,
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
{ {
size_t const cSize = (nbStreams==HUF_singleStream) ? size_t const cSize = (nbStreams==HUF_singleStream) ?
HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :
HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);
if (HUF_isError(cSize)) { return cSize; } if (HUF_isError(cSize)) { return cSize; }
if (cSize==0) { return 0; } /* uncompressible */ if (cSize==0) { return 0; } /* uncompressible */
op += cSize; op += cSize;
@ -1168,6 +1216,79 @@ typedef struct {
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096 #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
{
unsigned cardinality = 0;
unsigned i;
for (i = 0; i < maxSymbolValue + 1; i++) {
if (count[i] != 0) cardinality += 1;
}
return cardinality;
}
unsigned HUF_minTableLog(unsigned symbolCardinality)
{
U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;
return minBitsSymbols;
}
unsigned HUF_optimalTableLog(
unsigned maxTableLog,
size_t srcSize,
unsigned maxSymbolValue,
void* workSpace, size_t wkspSize,
HUF_CElt* table,
const unsigned* count,
int flags)
{
assert(srcSize > 1); /* Not supported, RLE should be used instead */
assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
if (!(flags & HUF_flags_optimalDepth)) {
/* cheap evaluation, based on FSE */
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
}
{ BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
size_t maxBits, hSize, newSize;
const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
size_t optSize = ((size_t) ~0) - 1;
unsigned optLog = maxTableLog, optLogGuess;
DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
/* Search until size increases */
for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
if (ERR_isError(maxBits)) continue;
if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
if (ERR_isError(hSize)) continue;
newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
if (newSize > optSize + 1) {
break;
}
if (newSize < optSize) {
optSize = newSize;
optLog = optLogGuess;
}
}
assert(optLog <= HUF_TABLELOG_MAX);
return optLog;
}
}
/* HUF_compress_internal() : /* HUF_compress_internal() :
* `workSpace_align4` must be aligned on 4-bytes boundaries, * `workSpace_align4` must be aligned on 4-bytes boundaries,
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */ * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
@ -1177,14 +1298,14 @@ HUF_compress_internal (void* dst, size_t dstSize,
unsigned maxSymbolValue, unsigned huffLog, unsigned maxSymbolValue, unsigned huffLog,
HUF_nbStreams_e nbStreams, HUF_nbStreams_e nbStreams,
void* workSpace, size_t wkspSize, void* workSpace, size_t wkspSize,
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
const int bmi2, unsigned suspectUncompressible)
{ {
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t)); HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize; BYTE* const oend = ostart + dstSize;
BYTE* op = ostart; BYTE* op = ostart;
DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);
HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE); HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
/* checks & inits */ /* checks & inits */
@ -1198,16 +1319,17 @@ HUF_compress_internal (void* dst, size_t dstSize,
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
/* Heuristic : If old table is valid, use it for small inputs */ /* Heuristic : If old table is valid, use it for small inputs */
if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {
return HUF_compressCTable_internal(ostart, op, oend, return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize, src, srcSize,
nbStreams, oldHufTable, bmi2); nbStreams, oldHufTable, flags);
} }
/* If uncompressible data is suspected, do a smaller sampling first */ /* If uncompressible data is suspected, do a smaller sampling first */
DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2); DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
size_t largestTotal = 0; size_t largestTotal = 0;
DEBUGLOG(5, "input suspected incompressible : sampling to check");
{ unsigned maxSymbolValueBegin = maxSymbolValue; { unsigned maxSymbolValueBegin = maxSymbolValue;
CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
largestTotal += largestBegin; largestTotal += largestBegin;
@ -1224,6 +1346,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
} }
DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));
/* Check validity of previous table */ /* Check validity of previous table */
if ( repeat if ( repeat
@ -1232,19 +1355,20 @@ HUF_compress_internal (void* dst, size_t dstSize,
*repeat = HUF_repeat_none; *repeat = HUF_repeat_none;
} }
/* Heuristic : use existing table for small inputs */ /* Heuristic : use existing table for small inputs */
if (preferRepeat && repeat && *repeat != HUF_repeat_none) { if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {
return HUF_compressCTable_internal(ostart, op, oend, return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize, src, srcSize,
nbStreams, oldHufTable, bmi2); nbStreams, oldHufTable, flags);
} }
/* Build Huffman Tree */ /* Build Huffman Tree */
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);
{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
maxSymbolValue, huffLog, maxSymbolValue, huffLog,
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
CHECK_F(maxBits); CHECK_F(maxBits);
huffLog = (U32)maxBits; huffLog = (U32)maxBits;
DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
} }
/* Zero unused symbols in CTable, so we can check it for validity */ /* Zero unused symbols in CTable, so we can check it for validity */
{ {
@ -1263,7 +1387,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
return HUF_compressCTable_internal(ostart, op, oend, return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize, src, srcSize,
nbStreams, oldHufTable, bmi2); nbStreams, oldHufTable, flags);
} } } }
/* Use the new huffman table */ /* Use the new huffman table */
@ -1275,46 +1399,20 @@ HUF_compress_internal (void* dst, size_t dstSize,
} }
return HUF_compressCTable_internal(ostart, op, oend, return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize, src, srcSize,
nbStreams, table->CTable, bmi2); nbStreams, table->CTable, flags);
}
size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_singleStream,
workSpace, wkspSize,
NULL, NULL, 0, 0 /*bmi2*/, 0);
} }
size_t HUF_compress1X_repeat (void* dst, size_t dstSize, size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog, unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize, void* workSpace, size_t wkspSize,
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
int bmi2, unsigned suspectUncompressible)
{ {
DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
return HUF_compress_internal(dst, dstSize, src, srcSize, return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_singleStream, maxSymbolValue, huffLog, HUF_singleStream,
workSpace, wkspSize, hufTable, workSpace, wkspSize, hufTable,
repeat, preferRepeat, bmi2, suspectUncompressible); repeat, flags);
}
/* HUF_compress4X_repeat():
* compress input using 4 streams.
* provide workspace to generate compression tables */
size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_fourStreams,
workSpace, wkspSize,
NULL, NULL, 0, 0 /*bmi2*/, 0);
} }
/* HUF_compress4X_repeat(): /* HUF_compress4X_repeat():
@ -1325,11 +1423,11 @@ size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog, unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize, void* workSpace, size_t wkspSize,
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible) HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
{ {
DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
return HUF_compress_internal(dst, dstSize, src, srcSize, return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_fourStreams, maxSymbolValue, huffLog, HUF_fourStreams,
workSpace, wkspSize, workSpace, wkspSize,
hufTable, repeat, preferRepeat, bmi2, suspectUncompressible); hufTable, repeat, flags);
} }

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -20,6 +21,7 @@
***************************************/ ***************************************/
#include "../common/zstd_internal.h" #include "../common/zstd_internal.h"
#include "zstd_cwksp.h" #include "zstd_cwksp.h"
#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */
/*-************************************* /*-*************************************
@ -111,12 +113,13 @@ typedef struct {
/* ZSTD_buildBlockEntropyStats() : /* ZSTD_buildBlockEntropyStats() :
* Builds entropy for the block. * Builds entropy for the block.
* @return : 0 on success or error code */ * @return : 0 on success or error code */
size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, size_t ZSTD_buildBlockEntropyStats(
const ZSTD_entropyCTables_t* prevEntropy, const seqStore_t* seqStorePtr,
ZSTD_entropyCTables_t* nextEntropy, const ZSTD_entropyCTables_t* prevEntropy,
const ZSTD_CCtx_params* cctxParams, ZSTD_entropyCTables_t* nextEntropy,
ZSTD_entropyCTablesMetadata_t* entropyMetadata, const ZSTD_CCtx_params* cctxParams,
void* workspace, size_t wkspSize); ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize);
/* ******************************* /* *******************************
* Compression internals structs * * Compression internals structs *
@ -142,6 +145,12 @@ typedef struct {
size_t capacity; /* The capacity starting from `seq` pointer */ size_t capacity; /* The capacity starting from `seq` pointer */
} rawSeqStore_t; } rawSeqStore_t;
typedef struct {
U32 idx; /* Index in array of ZSTD_Sequence */
U32 posInSequence; /* Position within sequence at idx */
size_t posInSrc; /* Number of bytes given by sequences provided so far */
} ZSTD_sequencePosition;
UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
typedef struct { typedef struct {
@ -212,8 +221,10 @@ struct ZSTD_matchState_t {
U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
U64 hashSalt; /* For row-based matchFinder: salts the hash for re-use of tag table */
U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */
U32* hashTable; U32* hashTable;
U32* hashTable3; U32* hashTable3;
@ -228,6 +239,18 @@ struct ZSTD_matchState_t {
const ZSTD_matchState_t* dictMatchState; const ZSTD_matchState_t* dictMatchState;
ZSTD_compressionParameters cParams; ZSTD_compressionParameters cParams;
const rawSeqStore_t* ldmSeqStore; const rawSeqStore_t* ldmSeqStore;
/* Controls prefetching in some dictMatchState matchfinders.
* This behavior is controlled from the cctx ms.
* This parameter has no effect in the cdict ms. */
int prefetchCDictTables;
/* When == 0, lazy match finders insert every position.
* When != 0, lazy match finders only insert positions they search.
* This allows them to skip much faster over incompressible data,
* at a small cost to compression ratio.
*/
int lazySkipping;
}; };
typedef struct { typedef struct {
@ -324,6 +347,24 @@ struct ZSTD_CCtx_params_s {
/* Internal use, for createCCtxParams() and freeCCtxParams() only */ /* Internal use, for createCCtxParams() and freeCCtxParams() only */
ZSTD_customMem customMem; ZSTD_customMem customMem;
/* Controls prefetching in some dictMatchState matchfinders */
ZSTD_paramSwitch_e prefetchCDictTables;
/* Controls whether zstd will fall back to an internal matchfinder
* if the external matchfinder returns an error code. */
int enableMatchFinderFallback;
/* Indicates whether an external matchfinder has been referenced.
* Users can't set this externally.
* It is set internally in ZSTD_registerSequenceProducer(). */
int useSequenceProducer;
/* Adjust the max block size*/
size_t maxBlockSize;
/* Controls repcode search in external sequence parsing */
ZSTD_paramSwitch_e searchForExternalRepcodes;
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
@ -355,6 +396,14 @@ typedef struct {
ZSTD_entropyCTablesMetadata_t entropyMetadata; ZSTD_entropyCTablesMetadata_t entropyMetadata;
} ZSTD_blockSplitCtx; } ZSTD_blockSplitCtx;
/* Context for block-level external matchfinder API */
typedef struct {
void* mState;
ZSTD_sequenceProducer_F* mFinder;
ZSTD_Sequence* seqBuffer;
size_t seqBufferCapacity;
} ZSTD_externalMatchCtx;
struct ZSTD_CCtx_s { struct ZSTD_CCtx_s {
ZSTD_compressionStage_e stage; ZSTD_compressionStage_e stage;
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
@ -404,6 +453,7 @@ struct ZSTD_CCtx_s {
/* Stable in/out buffer verification */ /* Stable in/out buffer verification */
ZSTD_inBuffer expectedInBuffer; ZSTD_inBuffer expectedInBuffer;
size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */
size_t expectedOutBufferSize; size_t expectedOutBufferSize;
/* Dictionary */ /* Dictionary */
@ -417,9 +467,13 @@ struct ZSTD_CCtx_s {
/* Workspace for block splitter */ /* Workspace for block splitter */
ZSTD_blockSplitCtx blockSplitCtx; ZSTD_blockSplitCtx blockSplitCtx;
/* Workspace for external matchfinder */
ZSTD_externalMatchCtx externalMatchCtx;
}; };
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e;
typedef enum { typedef enum {
ZSTD_noDict = 0, ZSTD_noDict = 0,
@ -441,7 +495,7 @@ typedef enum {
* In this mode we take both the source size and the dictionary size * In this mode we take both the source size and the dictionary size
* into account when selecting and adjusting the parameters. * into account when selecting and adjusting the parameters.
*/ */
ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams. ZSTD_cpm_unknown = 3 /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
* We don't know what these parameters are for. We default to the legacy * We don't know what these parameters are for. We default to the legacy
* behavior of taking both the source size and the dict size into account * behavior of taking both the source size and the dict size into account
* when selecting and adjusting parameters. * when selecting and adjusting parameters.
@ -500,9 +554,11 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
/* ZSTD_noCompressBlock() : /* ZSTD_noCompressBlock() :
* Writes uncompressed block to dst buffer from given src. * Writes uncompressed block to dst buffer from given src.
* Returns the size of the block */ * Returns the size of the block */
MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) MEM_STATIC size_t
ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
{ {
U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity);
RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
dstSize_tooSmall, "dst buf too small for uncompressed block"); dstSize_tooSmall, "dst buf too small for uncompressed block");
MEM_writeLE24(dst, cBlockHeader24); MEM_writeLE24(dst, cBlockHeader24);
@ -510,7 +566,8 @@ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const voi
return ZSTD_blockHeaderSize + srcSize; return ZSTD_blockHeaderSize + srcSize;
} }
MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) MEM_STATIC size_t
ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
{ {
BYTE* const op = (BYTE*)dst; BYTE* const op = (BYTE*)dst;
U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
@ -529,7 +586,7 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
{ {
U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat));
return (srcSize >> minlog) + 2; return (srcSize >> minlog) + 2;
} }
@ -565,29 +622,27 @@ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE con
while (ip < iend) *op++ = *ip++; while (ip < iend) *op++ = *ip++;
} }
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
#define STORE_REPCODE_1 STORE_REPCODE(1) #define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1)
#define STORE_REPCODE_2 STORE_REPCODE(2) #define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2)
#define STORE_REPCODE_3 STORE_REPCODE(3) #define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3)
#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1) #define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */
#define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE) #define OFFSET_TO_OFFBASE(o) (assert((o)>0), o + ZSTD_REP_NUM)
#define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE) #define OFFBASE_IS_OFFSET(o) ((o) > ZSTD_REP_NUM)
#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE) #define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM)
#define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE) #define OFFBASE_TO_OFFSET(o) (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM)
#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */ #define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o)) /* returns ID 1,2,3 */
#define STORED_TO_OFFBASE(o) ((o)+1)
#define OFFBASE_TO_STORED(o) ((o)-1)
/*! ZSTD_storeSeq() : /*! ZSTD_storeSeq() :
* Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t. * Store a sequence (litlen, litPtr, offBase and matchLength) into seqStore_t.
* @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET(). * @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
* @matchLength : must be >= MINMATCH * @matchLength : must be >= MINMATCH
* Allowed to overread literals up to litLimit. * Allowed to over-read literals up to litLimit.
*/ */
HINT_INLINE UNUSED_ATTR void HINT_INLINE UNUSED_ATTR void
ZSTD_storeSeq(seqStore_t* seqStorePtr, ZSTD_storeSeq(seqStore_t* seqStorePtr,
size_t litLength, const BYTE* literals, const BYTE* litLimit, size_t litLength, const BYTE* literals, const BYTE* litLimit,
U32 offBase_minus1, U32 offBase,
size_t matchLength) size_t matchLength)
{ {
BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
@ -596,8 +651,8 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
static const BYTE* g_start = NULL; static const BYTE* g_start = NULL;
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
{ U32 const pos = (U32)((const BYTE*)literals - g_start); { U32 const pos = (U32)((const BYTE*)literals - g_start);
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u",
pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1); pos, (U32)litLength, (U32)matchLength, (U32)offBase);
} }
#endif #endif
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
@ -607,9 +662,9 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
assert(literals + litLength <= litLimit); assert(literals + litLength <= litLimit);
if (litEnd <= litLimit_w) { if (litEnd <= litLimit_w) {
/* Common case we can use wildcopy. /* Common case we can use wildcopy.
* First copy 16 bytes, because literals are likely short. * First copy 16 bytes, because literals are likely short.
*/ */
assert(WILDCOPY_OVERLENGTH >= 16); ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16);
ZSTD_copy16(seqStorePtr->lit, literals); ZSTD_copy16(seqStorePtr->lit, literals);
if (litLength > 16) { if (litLength > 16) {
ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
@ -628,7 +683,7 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
seqStorePtr->sequences[0].litLength = (U16)litLength; seqStorePtr->sequences[0].litLength = (U16)litLength;
/* match offset */ /* match offset */
seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1); seqStorePtr->sequences[0].offBase = offBase;
/* match Length */ /* match Length */
assert(matchLength >= MINMATCH); assert(matchLength >= MINMATCH);
@ -646,17 +701,17 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
/* ZSTD_updateRep() : /* ZSTD_updateRep() :
* updates in-place @rep (array of repeat offsets) * updates in-place @rep (array of repeat offsets)
* @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq() * @offBase : sum-type, using numeric representation of ZSTD_storeSeq()
*/ */
MEM_STATIC void MEM_STATIC void
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
{ {
if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */ if (OFFBASE_IS_OFFSET(offBase)) { /* full offset */
rep[2] = rep[1]; rep[2] = rep[1];
rep[1] = rep[0]; rep[1] = rep[0];
rep[0] = STORED_OFFSET(offBase_minus1); rep[0] = OFFBASE_TO_OFFSET(offBase);
} else { /* repcode */ } else { /* repcode */
U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0; U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0;
if (repCode > 0) { /* note : if repCode==0, no change */ if (repCode > 0) { /* note : if repCode==0, no change */
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
rep[2] = (repCode >= 2) ? rep[1] : rep[2]; rep[2] = (repCode >= 2) ? rep[1] : rep[2];
@ -673,11 +728,11 @@ typedef struct repcodes_s {
} repcodes_t; } repcodes_t;
MEM_STATIC repcodes_t MEM_STATIC repcodes_t
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
{ {
repcodes_t newReps; repcodes_t newReps;
ZSTD_memcpy(&newReps, rep, sizeof(newReps)); ZSTD_memcpy(&newReps, rep, sizeof(newReps));
ZSTD_updateRep(newReps.rep, offBase_minus1, ll0); ZSTD_updateRep(newReps.rep, offBase, ll0);
return newReps; return newReps;
} }
@ -685,59 +740,6 @@ ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0
/*-************************************* /*-*************************************
* Match length counter * Match length counter
***************************************/ ***************************************/
static unsigned ZSTD_NbCommonBytes (size_t val)
{
if (MEM_isLittleEndian()) {
if (MEM_64bits()) {
# if (__GNUC__ >= 4)
return (__builtin_ctzll((U64)val) >> 3);
# else
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
0, 3, 1, 3, 1, 4, 2, 7,
0, 2, 3, 6, 1, 5, 3, 5,
1, 3, 4, 4, 2, 5, 6, 7,
7, 0, 1, 2, 3, 3, 4, 6,
2, 6, 5, 5, 3, 4, 5, 6,
7, 1, 2, 4, 6, 4, 4, 5,
7, 2, 6, 5, 7, 6, 7, 7 };
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
} else { /* 32 bits */
# if (__GNUC__ >= 3)
return (__builtin_ctz((U32)val) >> 3);
# else
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
3, 2, 2, 1, 3, 2, 0, 1,
3, 3, 1, 2, 2, 2, 2, 0,
3, 1, 2, 0, 1, 0, 1, 1 };
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
} else { /* Big Endian CPU */
if (MEM_64bits()) {
# if (__GNUC__ >= 4)
return (__builtin_clzll(val) >> 3);
# else
unsigned r;
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
r += (!val);
return r;
# endif
} else { /* 32 bits */
# if (__GNUC__ >= 3)
return (__builtin_clz((U32)val) >> 3);
# else
unsigned r;
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
r += (!val);
return r;
# endif
} }
}
MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
{ {
const BYTE* const pStart = pIn; const BYTE* const pStart = pIn;
@ -783,32 +785,43 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
* Hashes * Hashes
***************************************/ ***************************************/
static const U32 prime3bytes = 506832829U; static const U32 prime3bytes = 506832829U;
static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; } static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s) >> (32-h) ; }
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */
MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); }
static const U32 prime4bytes = 2654435761U; static const U32 prime4bytes = 2654435761U;
static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; }
static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); }
static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); }
static const U64 prime5bytes = 889523592379ULL; static const U64 prime5bytes = 889523592379ULL;
static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); }
static const U64 prime6bytes = 227718039650203ULL; static const U64 prime6bytes = 227718039650203ULL;
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); }
static const U64 prime7bytes = 58295818150454627ULL; static const U64 prime7bytes = 58295818150454627ULL;
static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); }
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); }
MEM_STATIC FORCE_INLINE_ATTR MEM_STATIC FORCE_INLINE_ATTR
size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
{ {
/* Although some of these hashes do support hBits up to 64, some do not.
* To be on the safe side, always avoid hBits > 32. */
assert(hBits <= 32);
switch(mls) switch(mls)
{ {
default: default:
@ -820,6 +833,24 @@ size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
} }
} }
MEM_STATIC FORCE_INLINE_ATTR
size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) {
/* Although some of these hashes do support hBits up to 64, some do not.
* To be on the safe side, always avoid hBits > 32. */
assert(hBits <= 32);
switch(mls)
{
default:
case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt);
case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt);
case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt);
case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt);
case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt);
}
}
/* ZSTD_ipow() : /* ZSTD_ipow() :
* Return base^exponent. * Return base^exponent.
*/ */
@ -1167,10 +1198,15 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
assert(blockEndIdx >= loadedDictEnd); assert(blockEndIdx >= loadedDictEnd);
if (blockEndIdx > loadedDictEnd + maxDist) { if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) {
/* On reaching window size, dictionaries are invalidated. /* On reaching window size, dictionaries are invalidated.
* For simplification, if window size is reached anywhere within next block, * For simplification, if window size is reached anywhere within next block,
* the dictionary is invalidated for the full block. * the dictionary is invalidated for the full block.
*
* We also have to invalidate the dictionary if ZSTD_window_update() has detected
* non-contiguous segments, which means that loadedDictEnd != window->dictLimit.
* loadedDictEnd may be 0, if forceWindow is true, but in that case we never use
* dictMatchState, so setting it to NULL is not a problem.
*/ */
DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
*loadedDictEndPtr = 0; *loadedDictEndPtr = 0;
@ -1302,6 +1338,42 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
#endif #endif
/* Short Cache */
/* Normally, zstd matchfinders follow this flow:
* 1. Compute hash at ip
* 2. Load index from hashTable[hash]
* 3. Check if *ip == *(base + index)
* In dictionary compression, loading *(base + index) is often an L2 or even L3 miss.
*
* Short cache is an optimization which allows us to avoid step 3 most of the time
* when the data doesn't actually match. With short cache, the flow becomes:
* 1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip.
* 2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works.
* 3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue.
*
* Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to
* dictMatchState matchfinders.
*/
#define ZSTD_SHORT_CACHE_TAG_BITS 8
#define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1)
/* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable.
* Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */
MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) {
size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK);
assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0);
hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag;
}
/* Helper function for short cache matchfinders.
* Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */
MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) {
U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK;
U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK;
return tag1 == tag2;
}
/* =============================================================== /* ===============================================================
@ -1396,4 +1468,51 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
*/ */
void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
* ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
* Note that the block delimiter must include the last literals of the block.
*/
size_t
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
/* Returns the number of bytes to move the current read position back by.
* Only non-zero if we ended up splitting a sequence.
* Otherwise, it may return a ZSTD error if something went wrong.
*
* This function will attempt to scan through blockSize bytes
* represented by the sequences in @inSeqs,
* storing any (partial) sequences.
*
* Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
* avoid splitting a match, or to avoid splitting a match such that it would produce a match
* smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
*/
size_t
ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
/* ===============================================================
* Deprecated definitions that are still used internally to avoid
* deprecation warnings. These functions are exactly equivalent to
* their public variants, but avoid the deprecation warnings.
* =============================================================== */
size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
#endif /* ZSTD_COMPRESS_H */ #endif /* ZSTD_COMPRESS_H */

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -13,11 +14,36 @@
***************************************/ ***************************************/
#include "zstd_compress_literals.h" #include "zstd_compress_literals.h"
/* **************************************************************
* Debug Traces
****************************************************************/
#if DEBUGLEVEL >= 2
static size_t showHexa(const void* src, size_t srcSize)
{
const BYTE* const ip = (const BYTE*)src;
size_t u;
for (u=0; u<srcSize; u++) {
RAWLOG(5, " %02X", ip[u]); (void)ip;
}
RAWLOG(5, " \n");
return srcSize;
}
#endif
/* **************************************************************
* Literals compression - special cases
****************************************************************/
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{ {
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
switch(flSize) switch(flSize)
@ -36,16 +62,30 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
} }
ZSTD_memcpy(ostart + flSize, src, srcSize); ZSTD_memcpy(ostart + flSize, src, srcSize);
DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
return srcSize + flSize; return srcSize + flSize;
} }
static int allBytesIdentical(const void* src, size_t srcSize)
{
assert(srcSize >= 1);
assert(src != NULL);
{ const BYTE b = ((const BYTE*)src)[0];
size_t p;
for (p=1; p<srcSize; p++) {
if (((const BYTE*)src)[p] != b) return 0;
}
return 1;
}
}
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{ {
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ assert(dstCapacity >= 4); (void)dstCapacity;
assert(allBytesIdentical(src, srcSize));
switch(flSize) switch(flSize)
{ {
@ -63,28 +103,51 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void*
} }
ostart[flSize] = *(const BYTE*)src; ostart[flSize] = *(const BYTE*)src;
DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
return flSize+1; return flSize+1;
} }
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, /* ZSTD_minLiteralsToCompress() :
ZSTD_hufCTables_t* nextHuf, * returns minimal amount of literals
ZSTD_strategy strategy, int disableLiteralCompression, * for literal compression to even be attempted.
void* dst, size_t dstCapacity, * Minimum is made tighter as compression strategy increases.
const void* src, size_t srcSize, */
void* entropyWorkspace, size_t entropyWorkspaceSize, static size_t
const int bmi2, ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
unsigned suspectUncompressible) {
assert((int)strategy >= 0);
assert((int)strategy <= 9);
/* btultra2 : min 8 bytes;
* then 2x larger for each successive compression strategy
* max threshold 64 bytes */
{ int const shift = MIN(9-(int)strategy, 3);
size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
return mintc;
}
}
size_t ZSTD_compressLiterals (
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const ZSTD_hufCTables_t* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy,
int disableLiteralCompression,
int suspectUncompressible,
int bmi2)
{ {
size_t const minGain = ZSTD_minGain(srcSize, strategy);
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
U32 singleStream = srcSize < 256; U32 singleStream = srcSize < 256;
symbolEncodingType_e hType = set_compressed; symbolEncodingType_e hType = set_compressed;
size_t cLitSize; size_t cLitSize;
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
disableLiteralCompression, (U32)srcSize); disableLiteralCompression, (U32)srcSize, dstCapacity);
DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
/* Prepare nextEntropy assuming reusing the existing table */ /* Prepare nextEntropy assuming reusing the existing table */
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
@ -92,40 +155,51 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
if (disableLiteralCompression) if (disableLiteralCompression)
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
/* small ? don't even attempt compression (speed opt) */ /* if too small, don't even attempt compression (speed opt) */
# define COMPRESS_LITERALS_SIZE_MIN 63 if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
{ HUF_repeat repeat = prevHuf->repeatMode; { HUF_repeat repeat = prevHuf->repeatMode;
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; int const flags = 0
| (bmi2 ? HUF_flags_bmi2 : 0)
| (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
| (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
| (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
huf_compress_f huf_compress;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
cLitSize = singleStream ? huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
HUF_compress1X_repeat( cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
ostart+lhSize, dstCapacity-lhSize, src, srcSize, src, srcSize,
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, HUF_SYMBOLVALUE_MAX, LitHufLog,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) : entropyWorkspace, entropyWorkspaceSize,
HUF_compress4X_repeat( (HUF_CElt*)nextHuf->CTable,
ostart+lhSize, dstCapacity-lhSize, src, srcSize, &repeat, flags);
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
if (repeat != HUF_repeat_none) { if (repeat != HUF_repeat_none) {
/* reused the existing table */ /* reused the existing table */
DEBUGLOG(5, "Reusing previous huffman table"); DEBUGLOG(5, "reusing statistics from previous huffman block");
hType = set_repeat; hType = set_repeat;
} }
} }
if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { { size_t const minGain = ZSTD_minGain(srcSize, strategy);
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
} return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
} }
if (cLitSize==1) { if (cLitSize==1) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); /* A return value of 1 signals that the alphabet consists of a single symbol.
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); * However, in some rare circumstances, it could be the compressed size (a single byte).
} * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
* (it's also necessary to not generate statistics).
* Therefore, in such a case, actively check that all bytes are identical. */
if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
} }
if (hType == set_compressed) { if (hType == set_compressed) {
/* using a newly constructed table */ /* using a newly constructed table */
@ -136,16 +210,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
switch(lhSize) switch(lhSize)
{ {
case 3: /* 2 - 2 - 10 - 10 */ case 3: /* 2 - 2 - 10 - 10 */
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
MEM_writeLE24(ostart, lhc); MEM_writeLE24(ostart, lhc);
break; break;
} }
case 4: /* 2 - 2 - 14 - 14 */ case 4: /* 2 - 2 - 14 - 14 */
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
MEM_writeLE32(ostart, lhc); MEM_writeLE32(ostart, lhc);
break; break;
} }
case 5: /* 2 - 2 - 18 - 18 */ case 5: /* 2 - 2 - 18 - 18 */
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
MEM_writeLE32(ostart, lhc); MEM_writeLE32(ostart, lhc);
ostart[4] = (BYTE)(cLitSize >> 10); ostart[4] = (BYTE)(cLitSize >> 10);

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -16,16 +17,24 @@
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
/* ZSTD_compressRleLiteralsBlock() :
* Conditions :
* - All bytes in @src are identical
* - dstCapacity >= 4 */
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ /* ZSTD_compressLiterals():
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, * @entropyWorkspace: must be aligned on 4-bytes boundaries
ZSTD_hufCTables_t* nextHuf, * @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE
ZSTD_strategy strategy, int disableLiteralCompression, * @suspectUncompressible: sampling checks, to potentially skip huffman coding
void* dst, size_t dstCapacity, */
size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize, void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2, const ZSTD_hufCTables_t* prevHuf,
unsigned suspectUncompressible); ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
int suspectUncompressible,
int bmi2);
#endif /* ZSTD_COMPRESS_LITERALS_H */ #endif /* ZSTD_COMPRESS_LITERALS_H */

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -58,7 +59,7 @@ static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
{ {
/* Heuristic: This should cover most blocks <= 16K and /* Heuristic: This should cover most blocks <= 16K and
* start to fade out after 16K to about 32K depending on * start to fade out after 16K to about 32K depending on
* comprssibility. * compressibility.
*/ */
return nbSeq >= 2048; return nbSeq >= 2048;
} }
@ -166,7 +167,7 @@ ZSTD_selectEncodingType(
if (mostFrequent == nbSeq) { if (mostFrequent == nbSeq) {
*repeatMode = FSE_repeat_none; *repeatMode = FSE_repeat_none;
if (isDefaultAllowed && nbSeq <= 2) { if (isDefaultAllowed && nbSeq <= 2) {
/* Prefer set_basic over set_rle when there are 2 or less symbols, /* Prefer set_basic over set_rle when there are 2 or fewer symbols,
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
* If basic encoding isn't possible, always choose RLE. * If basic encoding isn't possible, always choose RLE.
*/ */

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -36,13 +37,14 @@
* If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
* and the following sub-blocks' literals sections will be Treeless_Literals_Block. * and the following sub-blocks' literals sections will be Treeless_Literals_Block.
* @return : compressed size of literals section of a sub-block * @return : compressed size of literals section of a sub-block
* Or 0 if it unable to compress. * Or 0 if unable to compress.
* Or error code */ * Or error code */
static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, static size_t
const ZSTD_hufCTablesMetadata_t* hufMetadata, ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
const BYTE* literals, size_t litSize, const ZSTD_hufCTablesMetadata_t* hufMetadata,
void* dst, size_t dstSize, const BYTE* literals, size_t litSize,
const int bmi2, int writeEntropy, int* entropyWritten) void* dst, size_t dstSize,
const int bmi2, int writeEntropy, int* entropyWritten)
{ {
size_t const header = writeEntropy ? 200 : 0; size_t const header = writeEntropy ? 200 : 0;
size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
@ -53,8 +55,6 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
size_t cLitSize = 0; size_t cLitSize = 0;
(void)bmi2; /* TODO bmi2... */
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
*entropyWritten = 0; *entropyWritten = 0;
@ -76,9 +76,9 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
} }
/* TODO bmi2 */ { int const flags = bmi2 ? HUF_flags_bmi2 : 0;
{ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable, flags)
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable, flags);
op += cSize; op += cSize;
cLitSize += cSize; cLitSize += cSize;
if (cSize == 0 || ERR_isError(cSize)) { if (cSize == 0 || ERR_isError(cSize)) {
@ -126,7 +126,11 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
return op-ostart; return op-ostart;
} }
static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { static size_t
ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
const seqDef* sequences, size_t nbSeq,
size_t litSize, int lastSequence)
{
const seqDef* const sstart = sequences; const seqDef* const sstart = sequences;
const seqDef* const send = sequences + nbSeq; const seqDef* const send = sequences + nbSeq;
const seqDef* sp = sstart; const seqDef* sp = sstart;
@ -156,13 +160,14 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
* @return : compressed size of sequences section of a sub-block * @return : compressed size of sequences section of a sub-block
* Or 0 if it is unable to compress * Or 0 if it is unable to compress
* Or error code. */ * Or error code. */
static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, static size_t
const ZSTD_fseCTablesMetadata_t* fseMetadata, ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
const seqDef* sequences, size_t nbSeq, const ZSTD_fseCTablesMetadata_t* fseMetadata,
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, const seqDef* sequences, size_t nbSeq,
const ZSTD_CCtx_params* cctxParams, const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
void* dst, size_t dstCapacity, const ZSTD_CCtx_params* cctxParams,
const int bmi2, int writeEntropy, int* entropyWritten) void* dst, size_t dstCapacity,
const int bmi2, int writeEntropy, int* entropyWritten)
{ {
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
@ -539,7 +544,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
repcodes_t rep; repcodes_t rep;
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
for (seq = sstart; seq < sp; ++seq) { for (seq = sstart; seq < sp; ++seq) {
ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
} }
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep)); ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
} }

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -14,7 +15,9 @@
/*-************************************* /*-*************************************
* Dependencies * Dependencies
***************************************/ ***************************************/
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
#include "../common/zstd_internal.h" #include "../common/zstd_internal.h"
#include "../common/portability_macros.h"
/*-************************************* /*-*************************************
@ -41,8 +44,9 @@
***************************************/ ***************************************/
typedef enum { typedef enum {
ZSTD_cwksp_alloc_objects, ZSTD_cwksp_alloc_objects,
ZSTD_cwksp_alloc_buffers, ZSTD_cwksp_alloc_aligned_init_once,
ZSTD_cwksp_alloc_aligned ZSTD_cwksp_alloc_aligned,
ZSTD_cwksp_alloc_buffers
} ZSTD_cwksp_alloc_phase_e; } ZSTD_cwksp_alloc_phase_e;
/* /*
@ -95,8 +99,8 @@ typedef enum {
* *
* Workspace Layout: * Workspace Layout:
* *
* [ ... workspace ... ] * [ ... workspace ... ]
* [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] * [objects][tables ->] free space [<- buffers][<- aligned][<- init once]
* *
* The various objects that live in the workspace are divided into the * The various objects that live in the workspace are divided into the
* following categories, and are allocated separately: * following categories, and are allocated separately:
@ -120,9 +124,18 @@ typedef enum {
* uint32_t arrays, all of whose values are between 0 and (nextSrc - base). * uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
* Their sizes depend on the cparams. These tables are 64-byte aligned. * Their sizes depend on the cparams. These tables are 64-byte aligned.
* *
* - Aligned: these buffers are used for various purposes that require 4 byte * - Init once: these buffers require to be initialized at least once before
* alignment, but don't require any initialization before they're used. These * use. They should be used when we want to skip memory initialization
* buffers are each aligned to 64 bytes. * while not triggering memory checkers (like Valgrind) when reading from
* from this memory without writing to it first.
* These buffers should be used carefully as they might contain data
* from previous compressions.
* Buffers are aligned to 64 bytes.
*
* - Aligned: these buffers don't require any initialization before they're
* used. The user of the buffer should make sure they write into a buffer
* location before reading from it.
* Buffers are aligned to 64 bytes.
* *
* - Buffers: these buffers are used for various purposes that don't require * - Buffers: these buffers are used for various purposes that don't require
* any alignment or initialization before they're used. This means they can * any alignment or initialization before they're used. This means they can
@ -134,8 +147,9 @@ typedef enum {
* correctly packed into the workspace buffer. That order is: * correctly packed into the workspace buffer. That order is:
* *
* 1. Objects * 1. Objects
* 2. Buffers * 2. Init once / Tables
* 3. Aligned/Tables * 3. Aligned / Tables
* 4. Buffers / Tables
* *
* Attempts to reserve objects of different types out of order will fail. * Attempts to reserve objects of different types out of order will fail.
*/ */
@ -147,6 +161,7 @@ typedef struct {
void* tableEnd; void* tableEnd;
void* tableValidEnd; void* tableValidEnd;
void* allocStart; void* allocStart;
void* initOnceStart;
BYTE allocFailed; BYTE allocFailed;
int workspaceOversizedDuration; int workspaceOversizedDuration;
@ -159,6 +174,7 @@ typedef struct {
***************************************/ ***************************************/
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws);
MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
(void)ws; (void)ws;
@ -168,6 +184,8 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
assert(ws->tableEnd <= ws->allocStart); assert(ws->tableEnd <= ws->allocStart);
assert(ws->tableValidEnd <= ws->allocStart); assert(ws->tableValidEnd <= ws->allocStart);
assert(ws->allocStart <= ws->workspaceEnd); assert(ws->allocStart <= ws->workspaceEnd);
assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws));
assert(ws->workspace <= ws->initOnceStart);
} }
/* /*
@ -210,14 +228,10 @@ MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
* for internal purposes (currently only alignment). * for internal purposes (currently only alignment).
*/ */
MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) { MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
/* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes /* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES
* to align the beginning of tables section, as well as another n_2=[0, 63] bytes * bytes to align the beginning of tables section and end of buffers;
* to align the beginning of the aligned section.
*
* n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
* aligneds being sized in multiples of 64 bytes.
*/ */
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES; size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2;
return slackSpace; return slackSpace;
} }
@ -230,10 +244,18 @@ MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignByt
size_t const alignBytesMask = alignBytes - 1; size_t const alignBytesMask = alignBytes - 1;
size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask; size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
assert((alignBytes & alignBytesMask) == 0); assert((alignBytes & alignBytesMask) == 0);
assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES); assert(bytes < alignBytes);
return bytes; return bytes;
} }
/*
* Returns the initial value for allocStart which is used to determine the position from
* which we can allocate from the end of the workspace.
*/
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) {
return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
}
/* /*
* Internal function. Do not use directly. * Internal function. Do not use directly.
* Reserves the given number of bytes within the aligned/buffer segment of the wksp, * Reserves the given number of bytes within the aligned/buffer segment of the wksp,
@ -274,27 +296,16 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
{ {
assert(phase >= ws->phase); assert(phase >= ws->phase);
if (phase > ws->phase) { if (phase > ws->phase) {
/* Going from allocating objects to allocating buffers */ /* Going from allocating objects to allocating initOnce / tables */
if (ws->phase < ZSTD_cwksp_alloc_buffers && if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once &&
phase >= ZSTD_cwksp_alloc_buffers) { phase >= ZSTD_cwksp_alloc_aligned_init_once) {
ws->tableValidEnd = ws->objectEnd; ws->tableValidEnd = ws->objectEnd;
} ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
/* Going from allocating buffers to allocating aligneds/tables */
if (ws->phase < ZSTD_cwksp_alloc_aligned &&
phase >= ZSTD_cwksp_alloc_aligned) {
{ /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
size_t const bytesToAlign =
ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
memory_allocation, "aligned phase - alignment initial allocation failed!");
}
{ /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */ { /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
void* const alloc = ws->objectEnd; void *const alloc = ws->objectEnd;
size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES); size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
void* const objectEnd = (BYTE*)alloc + bytesToAlign; void *const objectEnd = (BYTE *) alloc + bytesToAlign;
DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign); DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation, RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
"table phase - alignment initial allocation failed!"); "table phase - alignment initial allocation failed!");
@ -302,7 +313,9 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
ws->tableEnd = objectEnd; /* table area starts being empty */ ws->tableEnd = objectEnd; /* table area starts being empty */
if (ws->tableValidEnd < ws->tableEnd) { if (ws->tableValidEnd < ws->tableEnd) {
ws->tableValidEnd = ws->tableEnd; ws->tableValidEnd = ws->tableEnd;
} } } }
}
}
ws->phase = phase; ws->phase = phase;
ZSTD_cwksp_assert_internal_consistency(ws); ZSTD_cwksp_assert_internal_consistency(ws);
} }
@ -314,7 +327,7 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
*/ */
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
{ {
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd);
} }
/* /*
@ -343,6 +356,33 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
} }
/*
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
* This memory has been initialized at least once in the past.
* This doesn't mean it has been initialized this time, and it might contain data from previous
* operations.
* The main usage is for algorithms that might need read access into uninitialized memory.
* The algorithm must maintain safety under these conditions and must make sure it doesn't
* leak any of the past data (directly or in side channels).
*/
MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes)
{
size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES);
void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once);
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
if(ptr && ptr < ws->initOnceStart) {
/* We assume the memory following the current allocation is either:
* 1. Not usable as initOnce memory (end of workspace)
* 2. Another initOnce buffer that has been allocated before (and so was previously memset)
* 3. An ASAN redzone, in which case we don't want to write on it
* For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart.
* Note that we assume here that MSAN and ASAN cannot run in the same time. */
ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes));
ws->initOnceStart = ptr;
}
return ptr;
}
/* /*
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes). * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
*/ */
@ -361,13 +401,17 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
*/ */
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
{ {
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once;
void* alloc; void* alloc;
void* end; void* end;
void* top; void* top;
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) { /* We can only start allocating tables after we are done reserving space for objects at the
return NULL; * start of the workspace */
if(ws->phase < phase) {
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
return NULL;
}
} }
alloc = ws->tableEnd; alloc = ws->tableEnd;
end = (BYTE *)alloc + bytes; end = (BYTE *)alloc + bytes;
@ -451,7 +495,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
assert(ws->tableValidEnd >= ws->objectEnd); assert(ws->tableValidEnd >= ws->objectEnd);
assert(ws->tableValidEnd <= ws->allocStart); assert(ws->tableValidEnd <= ws->allocStart);
if (ws->tableValidEnd < ws->tableEnd) { if (ws->tableValidEnd < ws->tableEnd) {
ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); ZSTD_memset(ws->tableValidEnd, 0, (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd));
} }
ZSTD_cwksp_mark_tables_clean(ws); ZSTD_cwksp_mark_tables_clean(ws);
} }
@ -478,10 +522,10 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
ws->tableEnd = ws->objectEnd; ws->tableEnd = ws->objectEnd;
ws->allocStart = ws->workspaceEnd; ws->allocStart = ZSTD_cwksp_initialAllocStart(ws);
ws->allocFailed = 0; ws->allocFailed = 0;
if (ws->phase > ZSTD_cwksp_alloc_buffers) { if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) {
ws->phase = ZSTD_cwksp_alloc_buffers; ws->phase = ZSTD_cwksp_alloc_aligned_init_once;
} }
ZSTD_cwksp_assert_internal_consistency(ws); ZSTD_cwksp_assert_internal_consistency(ws);
} }
@ -498,6 +542,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_c
ws->workspaceEnd = (BYTE*)start + size; ws->workspaceEnd = (BYTE*)start + size;
ws->objectEnd = ws->workspace; ws->objectEnd = ws->workspace;
ws->tableValidEnd = ws->objectEnd; ws->tableValidEnd = ws->objectEnd;
ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
ws->phase = ZSTD_cwksp_alloc_objects; ws->phase = ZSTD_cwksp_alloc_objects;
ws->isStatic = isStatic; ws->isStatic = isStatic;
ZSTD_cwksp_clear(ws); ZSTD_cwksp_clear(ws);
@ -550,17 +595,11 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
* Returns if the estimated space needed for a wksp is within an acceptable limit of the * Returns if the estimated space needed for a wksp is within an acceptable limit of the
* actual amount of space used. * actual amount of space used.
*/ */
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws, MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) {
size_t const estimatedSpace, int resizedWorkspace) { /* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice
if (resizedWorkspace) { * the alignment bytes difference between estimation and actual usage */
/* Resized/newly allocated wksp should have exact bounds */ return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) &&
return ZSTD_cwksp_used(ws) == estimatedSpace; ZSTD_cwksp_used(ws) <= estimatedSpace;
} else {
/* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
* than estimatedSpace. See the comments in zstd_cwksp.h for details.
*/
return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
}
} }

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -11,8 +12,43 @@
#include "zstd_compress_internal.h" #include "zstd_compress_internal.h"
#include "zstd_double_fast.h" #include "zstd_double_fast.h"
static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashLarge = ms->hashTable;
U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
U32 const mls = cParams->minMatch;
U32* const hashSmall = ms->chainTable;
U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* const base = ms->window.base;
const BYTE* ip = base + ms->nextToUpdate;
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
const U32 fastHashFillStep = 3;
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, /* Always insert every fastHashFillStep position into the hash tables.
* Insert the other positions into the large hash table if their entry
* is empty.
*/
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
U32 const curr = (U32)(ip - base);
U32 i;
for (i = 0; i < fastHashFillStep; ++i) {
size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
if (i == 0) {
ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
}
if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
}
/* Only load extra positions for ZSTD_dtlm_full */
if (dtlm == ZSTD_dtlm_fast)
break;
} }
}
static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm) void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{ {
const ZSTD_compressionParameters* const cParams = &ms->cParams; const ZSTD_compressionParameters* const cParams = &ms->cParams;
@ -43,7 +79,19 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
/* Only load extra positions for ZSTD_dtlm_full */ /* Only load extra positions for ZSTD_dtlm_full */
if (dtlm == ZSTD_dtlm_fast) if (dtlm == ZSTD_dtlm_fast)
break; break;
} } } }
}
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp)
{
if (tfp == ZSTD_tfp_forCDict) {
ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
} else {
ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
}
} }
@ -67,7 +115,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
const BYTE* const iend = istart + srcSize; const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE; const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1]; U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0; U32 offsetSaved1 = 0, offsetSaved2 = 0;
size_t mLength; size_t mLength;
U32 offset; U32 offset;
@ -100,8 +148,8 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
U32 const current = (U32)(ip - base); U32 const current = (U32)(ip - base);
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
U32 const maxRep = current - windowLow; U32 const maxRep = current - windowLow;
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
} }
/* Outer Loop: one iteration per match found and stored */ /* Outer Loop: one iteration per match found and stored */
@ -131,7 +179,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++; ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
goto _match_stored; goto _match_stored;
} }
@ -175,9 +223,13 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
} while (ip1 <= ilimit); } while (ip1 <= ilimit);
_cleanup: _cleanup:
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
/* save reps for next block */ /* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved; rep[0] = offset_1 ? offset_1 : offsetSaved1;
rep[1] = offset_2 ? offset_2 : offsetSaved; rep[1] = offset_2 ? offset_2 : offsetSaved2;
/* Return the last literals size */ /* Return the last literals size */
return (size_t)(iend - anchor); return (size_t)(iend - anchor);
@ -217,7 +269,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
hashLong[hl1] = (U32)(ip1 - base); hashLong[hl1] = (U32)(ip1 - base);
} }
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
_match_stored: _match_stored:
/* match found */ /* match found */
@ -243,7 +295,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength); ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
ip += rLength; ip += rLength;
anchor = ip; anchor = ip;
continue; /* faster when present ... (?) */ continue; /* faster when present ... (?) */
@ -275,7 +327,6 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* const iend = istart + srcSize; const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE; const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1]; U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
const ZSTD_matchState_t* const dms = ms->dictMatchState; const ZSTD_matchState_t* const dms = ms->dictMatchState;
const ZSTD_compressionParameters* const dictCParams = &dms->cParams; const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
@ -286,8 +337,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* const dictStart = dictBase + dictStartIndex; const BYTE* const dictStart = dictBase + dictStartIndex;
const BYTE* const dictEnd = dms->window.nextSrc; const BYTE* const dictEnd = dms->window.nextSrc;
const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase); const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
const U32 dictHBitsL = dictCParams->hashLog; const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
const U32 dictHBitsS = dictCParams->chainLog; const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic"); DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
@ -295,6 +346,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
/* if a dictionary is attached, it must be within window range */ /* if a dictionary is attached, it must be within window range */
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
PREFETCH_AREA(dictHashLong, hashTableBytes)
PREFETCH_AREA(dictHashSmall, chainTableBytes)
}
/* init */ /* init */
ip += (dictAndPrefixLength == 0); ip += (dictAndPrefixLength == 0);
@ -309,8 +367,12 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
U32 offset; U32 offset;
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
U32 const curr = (U32)(ip-base); U32 const curr = (U32)(ip-base);
U32 const matchIndexL = hashLong[h2]; U32 const matchIndexL = hashLong[h2];
U32 matchIndexS = hashSmall[h]; U32 matchIndexS = hashSmall[h];
@ -328,7 +390,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
ip++; ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
goto _match_stored; goto _match_stored;
} }
@ -340,9 +402,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
goto _match_found; goto _match_found;
} }
} else { } else if (dictTagsMatchL) {
/* check dictMatchState long match */ /* check dictMatchState long match */
U32 const dictMatchIndexL = dictHashLong[dictHL]; U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* dictMatchL = dictBase + dictMatchIndexL; const BYTE* dictMatchL = dictBase + dictMatchIndexL;
assert(dictMatchL < dictEnd); assert(dictMatchL < dictEnd);
@ -358,9 +420,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
if (MEM_read32(match) == MEM_read32(ip)) { if (MEM_read32(match) == MEM_read32(ip)) {
goto _search_next_long; goto _search_next_long;
} }
} else { } else if (dictTagsMatchS) {
/* check dictMatchState short match */ /* check dictMatchState short match */
U32 const dictMatchIndexS = dictHashSmall[dictHS]; U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
match = dictBase + dictMatchIndexS; match = dictBase + dictMatchIndexS;
matchIndexS = dictMatchIndexS + dictIndexDelta; matchIndexS = dictMatchIndexS + dictIndexDelta;
@ -375,10 +437,11 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
continue; continue;
_search_next_long: _search_next_long:
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
U32 const matchIndexL3 = hashLong[hl3]; U32 const matchIndexL3 = hashLong[hl3];
U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
const BYTE* matchL3 = base + matchIndexL3; const BYTE* matchL3 = base + matchIndexL3;
hashLong[hl3] = curr + 1; hashLong[hl3] = curr + 1;
@ -391,9 +454,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
goto _match_found; goto _match_found;
} }
} else { } else if (dictTagsMatchL3) {
/* check dict long +1 match */ /* check dict long +1 match */
U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
assert(dictMatchL3 < dictEnd); assert(dictMatchL3 < dictEnd);
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
@ -419,7 +482,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
_match_stored: _match_stored:
/* match found */ /* match found */
@ -448,7 +511,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2; ip += repLength2;
@ -461,8 +524,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
} /* while (ip < ilimit) */ } /* while (ip < ilimit) */
/* save reps for next block */ /* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved; rep[0] = offset_1;
rep[1] = offset_2 ? offset_2 : offsetSaved; rep[1] = offset_2;
/* Return the last literals size */ /* Return the last literals size */
return (size_t)(iend - anchor); return (size_t)(iend - anchor);
@ -585,7 +648,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++; ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
} else { } else {
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@ -596,7 +659,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@ -621,7 +684,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
} }
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
} else { } else {
ip += ((ip-anchor) >> kSearchStrength) + 1; ip += ((ip-anchor) >> kSearchStrength) + 1;
@ -653,7 +716,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2; ip += repLength2;

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -16,7 +17,8 @@
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm); void const* end, ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp);
size_t ZSTD_compressBlock_doubleFast( size_t ZSTD_compressBlock_doubleFast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -11,8 +12,42 @@
#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
#include "zstd_fast.h" #include "zstd_fast.h"
static void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable;
U32 const hBits = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
U32 const mls = cParams->minMatch;
const BYTE* const base = ms->window.base;
const BYTE* ip = base + ms->nextToUpdate;
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
const U32 fastHashFillStep = 3;
void ZSTD_fillHashTable(ZSTD_matchState_t* ms, /* Currently, we always use ZSTD_dtlm_full for filling CDict tables.
* Feel free to remove this assert if there's a good reason! */
assert(dtlm == ZSTD_dtlm_full);
/* Always insert every fastHashFillStep position into the hash table.
* Insert the other positions if their hash entry is empty.
*/
for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
U32 const curr = (U32)(ip - base);
{ size_t const hashAndTag = ZSTD_hashPtr(ip, hBits, mls);
ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr); }
if (dtlm == ZSTD_dtlm_fast) continue;
/* Only load extra positions for ZSTD_dtlm_full */
{ U32 p;
for (p = 1; p < fastHashFillStep; ++p) {
size_t const hashAndTag = ZSTD_hashPtr(ip + p, hBits, mls);
if (hashTable[hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { /* not yet filled */
ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr + p);
} } } }
}
static void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
const void* const end, const void* const end,
ZSTD_dictTableLoadMethod_e dtlm) ZSTD_dictTableLoadMethod_e dtlm)
{ {
@ -25,6 +60,10 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
const U32 fastHashFillStep = 3; const U32 fastHashFillStep = 3;
/* Currently, we always use ZSTD_dtlm_fast for filling CCtx tables.
* Feel free to remove this assert if there's a good reason! */
assert(dtlm == ZSTD_dtlm_fast);
/* Always insert every fastHashFillStep position into the hash table. /* Always insert every fastHashFillStep position into the hash table.
* Insert the other positions if their hash entry is empty. * Insert the other positions if their hash entry is empty.
*/ */
@ -42,6 +81,18 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
} } } } } } } }
} }
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp)
{
if (tfp == ZSTD_tfp_forCDict) {
ZSTD_fillHashTableForCDict(ms, end, dtlm);
} else {
ZSTD_fillHashTableForCCtx(ms, end, dtlm);
}
}
/* /*
* If you squint hard enough (and ignore repcodes), the search operation at any * If you squint hard enough (and ignore repcodes), the search operation at any
@ -117,7 +168,7 @@ ZSTD_compressBlock_fast_noDict_generic(
U32 rep_offset1 = rep[0]; U32 rep_offset1 = rep[0];
U32 rep_offset2 = rep[1]; U32 rep_offset2 = rep[1];
U32 offsetSaved = 0; U32 offsetSaved1 = 0, offsetSaved2 = 0;
size_t hash0; /* hash for ip0 */ size_t hash0; /* hash for ip0 */
size_t hash1; /* hash for ip1 */ size_t hash1; /* hash for ip1 */
@ -141,8 +192,8 @@ ZSTD_compressBlock_fast_noDict_generic(
{ U32 const curr = (U32)(ip0 - base); { U32 const curr = (U32)(ip0 - base);
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
U32 const maxRep = curr - windowLow; U32 const maxRep = curr - windowLow;
if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0; if (rep_offset2 > maxRep) offsetSaved2 = rep_offset2, rep_offset2 = 0;
if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0; if (rep_offset1 > maxRep) offsetSaved1 = rep_offset1, rep_offset1 = 0;
} }
/* start each op */ /* start each op */
@ -180,8 +231,14 @@ ZSTD_compressBlock_fast_noDict_generic(
mLength = ip0[-1] == match0[-1]; mLength = ip0[-1] == match0[-1];
ip0 -= mLength; ip0 -= mLength;
match0 -= mLength; match0 -= mLength;
offcode = STORE_REPCODE_1; offcode = REPCODE1_TO_OFFBASE;
mLength += 4; mLength += 4;
/* First write next hash table entry; we've already calculated it.
* This write is known to be safe because the ip1 is before the
* repcode (ip2). */
hashTable[hash1] = (U32)(ip1 - base);
goto _match; goto _match;
} }
@ -195,6 +252,12 @@ ZSTD_compressBlock_fast_noDict_generic(
/* check match at ip[0] */ /* check match at ip[0] */
if (MEM_read32(ip0) == mval) { if (MEM_read32(ip0) == mval) {
/* found a match! */ /* found a match! */
/* First write next hash table entry; we've already calculated it.
* This write is known to be safe because the ip1 == ip0 + 1, so
* we know we will resume searching after ip1 */
hashTable[hash1] = (U32)(ip1 - base);
goto _offset; goto _offset;
} }
@ -224,6 +287,21 @@ ZSTD_compressBlock_fast_noDict_generic(
/* check match at ip[0] */ /* check match at ip[0] */
if (MEM_read32(ip0) == mval) { if (MEM_read32(ip0) == mval) {
/* found a match! */ /* found a match! */
/* first write next hash table entry; we've already calculated it */
if (step <= 4) {
/* We need to avoid writing an index into the hash table >= the
* position at which we will pick up our searching after we've
* taken this match.
*
* The minimum possible match has length 4, so the earliest ip0
* can be after we take this match will be the current ip0 + 4.
* ip1 is ip0 + step - 1. If ip1 is >= ip0 + 4, we can't safely
* write this position.
*/
hashTable[hash1] = (U32)(ip1 - base);
}
goto _offset; goto _offset;
} }
@ -254,9 +332,24 @@ ZSTD_compressBlock_fast_noDict_generic(
* However, it seems to be a meaningful performance hit to try to search * However, it seems to be a meaningful performance hit to try to search
* them. So let's not. */ * them. So let's not. */
/* When the repcodes are outside of the prefix, we set them to zero before the loop.
* When the offsets are still zero, we need to restore them after the block to have a correct
* repcode history. If only one offset was invalid, it is easy. The tricky case is when both
* offsets were invalid. We need to figure out which offset to refill with.
* - If both offsets are zero they are in the same order.
* - If both offsets are non-zero, we won't restore the offsets from `offsetSaved[12]`.
* - If only one is zero, we need to decide which offset to restore.
* - If rep_offset1 is non-zero, then rep_offset2 must be offsetSaved1.
* - It is impossible for rep_offset2 to be non-zero.
*
* So if rep_offset1 started invalid (offsetSaved1 != 0) and became valid (rep_offset1 != 0), then
* set rep[0] = rep_offset1 and rep[1] = offsetSaved1.
*/
offsetSaved2 = ((offsetSaved1 != 0) && (rep_offset1 != 0)) ? offsetSaved1 : offsetSaved2;
/* save reps for next block */ /* save reps for next block */
rep[0] = rep_offset1 ? rep_offset1 : offsetSaved; rep[0] = rep_offset1 ? rep_offset1 : offsetSaved1;
rep[1] = rep_offset2 ? rep_offset2 : offsetSaved; rep[1] = rep_offset2 ? rep_offset2 : offsetSaved2;
/* Return the last literals size */ /* Return the last literals size */
return (size_t)(iend - anchor); return (size_t)(iend - anchor);
@ -267,7 +360,7 @@ ZSTD_compressBlock_fast_noDict_generic(
match0 = base + idx; match0 = base + idx;
rep_offset2 = rep_offset1; rep_offset2 = rep_offset1;
rep_offset1 = (U32)(ip0-match0); rep_offset1 = (U32)(ip0-match0);
offcode = STORE_OFFSET(rep_offset1); offcode = OFFSET_TO_OFFBASE(rep_offset1);
mLength = 4; mLength = 4;
/* Count the backwards match length. */ /* Count the backwards match length. */
@ -287,11 +380,6 @@ ZSTD_compressBlock_fast_noDict_generic(
ip0 += mLength; ip0 += mLength;
anchor = ip0; anchor = ip0;
/* write next hash table entry */
if (ip1 < ip0) {
hashTable[hash1] = (U32)(ip1 - base);
}
/* Fill table and check for immediate repcode. */ /* Fill table and check for immediate repcode. */
if (ip0 <= ilimit) { if (ip0 <= ilimit) {
/* Fill Table */ /* Fill Table */
@ -306,7 +394,7 @@ ZSTD_compressBlock_fast_noDict_generic(
{ U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */ { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
ip0 += rLength; ip0 += rLength;
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength); ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
anchor = ip0; anchor = ip0;
continue; /* faster when present (confirmed on gcc-8) ... (?) */ continue; /* faster when present (confirmed on gcc-8) ... (?) */
} } } } } }
@ -380,14 +468,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
U32 const stepSize = cParams->targetLength + !(cParams->targetLength); U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
const BYTE* const base = ms->window.base; const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src; const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart; const BYTE* ip0 = istart;
const BYTE* ip1 = ip0 + stepSize; /* we assert below that stepSize >= 1 */
const BYTE* anchor = istart; const BYTE* anchor = istart;
const U32 prefixStartIndex = ms->window.dictLimit; const U32 prefixStartIndex = ms->window.dictLimit;
const BYTE* const prefixStart = base + prefixStartIndex; const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const iend = istart + srcSize; const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE; const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1]; U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
const ZSTD_matchState_t* const dms = ms->dictMatchState; const ZSTD_matchState_t* const dms = ms->dictMatchState;
const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
@ -397,13 +485,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const BYTE* const dictStart = dictBase + dictStartIndex; const BYTE* const dictStart = dictBase + dictStartIndex;
const BYTE* const dictEnd = dms->window.nextSrc; const BYTE* const dictEnd = dms->window.nextSrc;
const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase);
const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); const U32 dictAndPrefixLength = (U32)(istart - prefixStart + dictEnd - dictStart);
const U32 dictHLog = dictCParams->hashLog; const U32 dictHBits = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
/* if a dictionary is still attached, it necessarily means that /* if a dictionary is still attached, it necessarily means that
* it is within window size. So we just check it. */ * it is within window size. So we just check it. */
const U32 maxDistance = 1U << cParams->windowLog; const U32 maxDistance = 1U << cParams->windowLog;
const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
assert(endIndex - prefixStartIndex <= maxDistance); assert(endIndex - prefixStartIndex <= maxDistance);
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
@ -413,106 +501,155 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
* when translating a dict index into a local index */ * when translating a dict index into a local index */
assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
PREFETCH_AREA(dictHashTable, hashTableBytes)
}
/* init */ /* init */
DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
ip += (dictAndPrefixLength == 0); ip0 += (dictAndPrefixLength == 0);
/* dictMatchState repCode checks don't currently handle repCode == 0 /* dictMatchState repCode checks don't currently handle repCode == 0
* disabling. */ * disabling. */
assert(offset_1 <= dictAndPrefixLength); assert(offset_1 <= dictAndPrefixLength);
assert(offset_2 <= dictAndPrefixLength); assert(offset_2 <= dictAndPrefixLength);
/* Main Search Loop */ /* Outer search loop */
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ assert(stepSize >= 1);
while (ip1 <= ilimit) { /* repcode check at (ip0 + 1) is safe because ip0 < ip1 */
size_t mLength; size_t mLength;
size_t const h = ZSTD_hashPtr(ip, hlog, mls); size_t hash0 = ZSTD_hashPtr(ip0, hlog, mls);
U32 const curr = (U32)(ip-base);
U32 const matchIndex = hashTable[h];
const BYTE* match = base + matchIndex;
const U32 repIndex = curr + 1 - offset_1;
const BYTE* repMatch = (repIndex < prefixStartIndex) ?
dictBase + (repIndex - dictIndexDelta) :
base + repIndex;
hashTable[h] = curr; /* update hash table */
if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ size_t const dictHashAndTag0 = ZSTD_hashPtr(ip0, dictHBits, mls);
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { U32 dictMatchIndexAndTag = dictHashTable[dictHashAndTag0 >> ZSTD_SHORT_CACHE_TAG_BITS];
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; int dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag0);
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++; U32 matchIndex = hashTable[hash0];
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); U32 curr = (U32)(ip0 - base);
} else if ( (matchIndex <= prefixStartIndex) ) { size_t step = stepSize;
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); const size_t kStepIncr = 1 << kSearchStrength;
U32 const dictMatchIndex = dictHashTable[dictHash]; const BYTE* nextStep = ip0 + kStepIncr;
const BYTE* dictMatch = dictBase + dictMatchIndex;
if (dictMatchIndex <= dictStartIndex || /* Inner search loop */
MEM_read32(dictMatch) != MEM_read32(ip)) { while (1) {
assert(stepSize >= 1); const BYTE* match = base + matchIndex;
ip += ((ip-anchor) >> kSearchStrength) + stepSize; const U32 repIndex = curr + 1 - offset_1;
continue; const BYTE* repMatch = (repIndex < prefixStartIndex) ?
} else { dictBase + (repIndex - dictIndexDelta) :
/* found a dict match */ base + repIndex;
U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta); const size_t hash1 = ZSTD_hashPtr(ip1, hlog, mls);
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls);
while (((ip>anchor) & (dictMatch>dictStart)) hashTable[hash0] = curr; /* update hash table */
&& (ip[-1] == dictMatch[-1])) {
ip--; dictMatch--; mLength++; if (((U32) ((prefixStartIndex - 1) - repIndex) >=
3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
&& (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) {
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4;
ip0++;
ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
break;
}
if (dictTagsMatch) {
/* Found a possible dict match */
const U32 dictMatchIndex = dictMatchIndexAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* dictMatch = dictBase + dictMatchIndex;
if (dictMatchIndex > dictStartIndex &&
MEM_read32(dictMatch) == MEM_read32(ip0)) {
/* To replicate extDict parse behavior, we only use dict matches when the normal matchIndex is invalid */
if (matchIndex <= prefixStartIndex) {
U32 const offset = (U32) (curr - dictMatchIndex - dictIndexDelta);
mLength = ZSTD_count_2segments(ip0 + 4, dictMatch + 4, iend, dictEnd, prefixStart) + 4;
while (((ip0 > anchor) & (dictMatch > dictStart))
&& (ip0[-1] == dictMatch[-1])) {
ip0--;
dictMatch--;
mLength++;
} /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
break;
}
}
}
if (matchIndex > prefixStartIndex && MEM_read32(match) == MEM_read32(ip0)) {
/* found a regular match */
U32 const offset = (U32) (ip0 - match);
mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4;
while (((ip0 > anchor) & (match > prefixStart))
&& (ip0[-1] == match[-1])) {
ip0--;
match--;
mLength++;
} /* catch up */ } /* catch up */
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
break;
} }
} else if (MEM_read32(match) != MEM_read32(ip)) {
/* it's not a match, and we're not going to check the dictionary */ /* Prepare for next iteration */
assert(stepSize >= 1); dictMatchIndexAndTag = dictHashTable[dictHashAndTag1 >> ZSTD_SHORT_CACHE_TAG_BITS];
ip += ((ip-anchor) >> kSearchStrength) + stepSize; dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag1);
continue; matchIndex = hashTable[hash1];
} else {
/* found a regular match */ if (ip1 >= nextStep) {
U32 const offset = (U32)(ip-match); step++;
mLength = ZSTD_count(ip+4, match+4, iend) + 4; nextStep += kStepIncr;
while (((ip>anchor) & (match>prefixStart)) }
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ ip0 = ip1;
offset_2 = offset_1; ip1 = ip1 + step;
offset_1 = offset; if (ip1 > ilimit) goto _cleanup;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
} curr = (U32)(ip0 - base);
hash0 = hash1;
} /* end inner search loop */
/* match found */ /* match found */
ip += mLength; assert(mLength);
anchor = ip; ip0 += mLength;
anchor = ip0;
if (ip <= ilimit) { if (ip0 <= ilimit) {
/* Fill Table */ /* Fill Table */
assert(base+curr+2 > istart); /* check base overflow */ assert(base+curr+2 > istart); /* check base overflow */
hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
/* check immediate repcode */ /* check immediate repcode */
while (ip <= ilimit) { while (ip0 <= ilimit) {
U32 const current2 = (U32)(ip-base); U32 const current2 = (U32)(ip0-base);
U32 const repIndex2 = current2 - offset_2; U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
dictBase - dictIndexDelta + repIndex2 : dictBase - dictIndexDelta + repIndex2 :
base + repIndex2; base + repIndex2;
if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) { && (MEM_read32(repMatch2) == MEM_read32(ip0))) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = current2;
ip += repLength2; ip0 += repLength2;
anchor = ip; anchor = ip0;
continue; continue;
} }
break; break;
} }
} }
/* Prepare for next iteration */
assert(ip0 == anchor);
ip1 = ip0 + stepSize;
} }
_cleanup:
/* save reps for next block */ /* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved; rep[0] = offset_1;
rep[1] = offset_2 ? offset_2 : offsetSaved; rep[1] = offset_2;
/* Return the last literals size */ /* Return the last literals size */
return (size_t)(iend - anchor); return (size_t)(iend - anchor);
@ -553,11 +690,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
U32* const hashTable = ms->hashTable; U32* const hashTable = ms->hashTable;
U32 const hlog = cParams->hashLog; U32 const hlog = cParams->hashLog;
/* support stepSize of 0 */ /* support stepSize of 0 */
U32 const stepSize = cParams->targetLength + !(cParams->targetLength); size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
const BYTE* const base = ms->window.base; const BYTE* const base = ms->window.base;
const BYTE* const dictBase = ms->window.dictBase; const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const istart = (const BYTE*)src; const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart; const BYTE* anchor = istart;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
@ -570,6 +706,28 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* const iend = istart + srcSize; const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8; const BYTE* const ilimit = iend - 8;
U32 offset_1=rep[0], offset_2=rep[1]; U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved1 = 0, offsetSaved2 = 0;
const BYTE* ip0 = istart;
const BYTE* ip1;
const BYTE* ip2;
const BYTE* ip3;
U32 current0;
size_t hash0; /* hash for ip0 */
size_t hash1; /* hash for ip1 */
U32 idx; /* match idx for ip0 */
const BYTE* idxBase; /* base pointer for idx */
U32 offcode;
const BYTE* match0;
size_t mLength;
const BYTE* matchEnd = 0; /* initialize to avoid warning, assert != 0 later */
size_t step;
const BYTE* nextStep;
const size_t kStepIncr = (1 << (kSearchStrength - 1));
(void)hasStep; /* not currently specialized on whether it's accelerated */ (void)hasStep; /* not currently specialized on whether it's accelerated */
@ -579,75 +737,202 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
if (prefixStartIndex == dictStartIndex) if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize); return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
/* Search Loop */ { U32 const curr = (U32)(ip0 - base);
while (ip < ilimit) { /* < instead of <=, because (ip+1) */ U32 const maxRep = curr - dictStartIndex;
const size_t h = ZSTD_hashPtr(ip, hlog, mls); if (offset_2 >= maxRep) offsetSaved2 = offset_2, offset_2 = 0;
const U32 matchIndex = hashTable[h]; if (offset_1 >= maxRep) offsetSaved1 = offset_1, offset_1 = 0;
const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; }
const BYTE* match = matchBase + matchIndex;
const U32 curr = (U32)(ip-base);
const U32 repIndex = curr + 1 - offset_1;
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
hashTable[h] = curr; /* update hash table */
DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ /* start each op */
& (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */ _start: /* Requires: ip0 */
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; step = stepSize;
size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; nextStep = ip0 + kStepIncr;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength); /* calculate positions, ip0 - anchor == 0, so we skip step calc */
ip += rLength; ip1 = ip0 + 1;
anchor = ip; ip2 = ip0 + step;
} else { ip3 = ip2 + 1;
if ( (matchIndex < dictStartIndex) ||
(MEM_read32(match) != MEM_read32(ip)) ) { if (ip3 >= ilimit) {
assert(stepSize >= 1); goto _cleanup;
ip += ((ip-anchor) >> kSearchStrength) + stepSize; }
continue;
hash0 = ZSTD_hashPtr(ip0, hlog, mls);
hash1 = ZSTD_hashPtr(ip1, hlog, mls);
idx = hashTable[hash0];
idxBase = idx < prefixStartIndex ? dictBase : base;
do {
{ /* load repcode match for ip[2] */
U32 const current2 = (U32)(ip2 - base);
U32 const repIndex = current2 - offset_1;
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
U32 rval;
if ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */
& (offset_1 > 0) ) {
rval = MEM_read32(repBase + repIndex);
} else {
rval = MEM_read32(ip2) ^ 1; /* guaranteed to not match. */
} }
{ const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; /* write back hash table entry */
U32 const offset = curr - matchIndex; current0 = (U32)(ip0 - base);
size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; hashTable[hash0] = current0;
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1; offset_1 = offset; /* update offset history */ /* check repcode at ip[2] */
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); if (MEM_read32(ip2) == rval) {
ip += mLength; ip0 = ip2;
anchor = ip; match0 = repBase + repIndex;
matchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
assert((match0 != prefixStart) & (match0 != dictStart));
mLength = ip0[-1] == match0[-1];
ip0 -= mLength;
match0 -= mLength;
offcode = REPCODE1_TO_OFFBASE;
mLength += 4;
goto _match;
} } } }
if (ip <= ilimit) { { /* load match for ip[0] */
/* Fill Table */ U32 const mval = idx >= dictStartIndex ?
hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; MEM_read32(idxBase + idx) :
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); MEM_read32(ip0) ^ 1; /* guaranteed not to match */
/* check immediate repcode */
while (ip <= ilimit) { /* check match at ip[0] */
U32 const current2 = (U32)(ip-base); if (MEM_read32(ip0) == mval) {
U32 const repIndex2 = current2 - offset_2; /* found a match! */
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; goto _offset;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */ } }
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; /* lookup ip[1] */
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; idx = hashTable[hash1];
{ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ idxBase = idx < prefixStartIndex ? dictBase : base;
ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; /* hash ip[2] */
ip += repLength2; hash0 = hash1;
anchor = ip; hash1 = ZSTD_hashPtr(ip2, hlog, mls);
continue;
} /* advance to next positions */
break; ip0 = ip1;
} } } ip1 = ip2;
ip2 = ip3;
/* write back hash table entry */
current0 = (U32)(ip0 - base);
hashTable[hash0] = current0;
{ /* load match for ip[0] */
U32 const mval = idx >= dictStartIndex ?
MEM_read32(idxBase + idx) :
MEM_read32(ip0) ^ 1; /* guaranteed not to match */
/* check match at ip[0] */
if (MEM_read32(ip0) == mval) {
/* found a match! */
goto _offset;
} }
/* lookup ip[1] */
idx = hashTable[hash1];
idxBase = idx < prefixStartIndex ? dictBase : base;
/* hash ip[2] */
hash0 = hash1;
hash1 = ZSTD_hashPtr(ip2, hlog, mls);
/* advance to next positions */
ip0 = ip1;
ip1 = ip2;
ip2 = ip0 + step;
ip3 = ip1 + step;
/* calculate step */
if (ip2 >= nextStep) {
step++;
PREFETCH_L1(ip1 + 64);
PREFETCH_L1(ip1 + 128);
nextStep += kStepIncr;
}
} while (ip3 < ilimit);
_cleanup:
/* Note that there are probably still a couple positions we could search.
* However, it seems to be a meaningful performance hit to try to search
* them. So let's not. */
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
/* save reps for next block */ /* save reps for next block */
rep[0] = offset_1; rep[0] = offset_1 ? offset_1 : offsetSaved1;
rep[1] = offset_2; rep[1] = offset_2 ? offset_2 : offsetSaved2;
/* Return the last literals size */ /* Return the last literals size */
return (size_t)(iend - anchor); return (size_t)(iend - anchor);
_offset: /* Requires: ip0, idx, idxBase */
/* Compute the offset code. */
{ U32 const offset = current0 - idx;
const BYTE* const lowMatchPtr = idx < prefixStartIndex ? dictStart : prefixStart;
matchEnd = idx < prefixStartIndex ? dictEnd : iend;
match0 = idxBase + idx;
offset_2 = offset_1;
offset_1 = offset;
offcode = OFFSET_TO_OFFBASE(offset);
mLength = 4;
/* Count the backwards match length. */
while (((ip0>anchor) & (match0>lowMatchPtr)) && (ip0[-1] == match0[-1])) {
ip0--;
match0--;
mLength++;
} }
_match: /* Requires: ip0, match0, offcode, matchEnd */
/* Count the forward length. */
assert(matchEnd != 0);
mLength += ZSTD_count_2segments(ip0 + mLength, match0 + mLength, iend, matchEnd, prefixStart);
ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
ip0 += mLength;
anchor = ip0;
/* write next hash table entry */
if (ip1 < ip0) {
hashTable[hash1] = (U32)(ip1 - base);
}
/* Fill table and check for immediate repcode. */
if (ip0 <= ilimit) {
/* Fill Table */
assert(base+current0+2 > istart); /* check base overflow */
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
while (ip0 <= ilimit) {
U32 const repIndex2 = (U32)(ip0-base) - offset_2;
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 > 0)) /* intentional underflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip0)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
{ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
ip0 += repLength2;
anchor = ip0;
continue;
}
break;
} }
goto _start;
} }
ZSTD_GEN_FAST_FN(extDict, 4, 0) ZSTD_GEN_FAST_FN(extDict, 4, 0)
@ -660,6 +945,7 @@ size_t ZSTD_compressBlock_fast_extDict(
void const* src, size_t srcSize) void const* src, size_t srcSize)
{ {
U32 const mls = ms->cParams.minMatch; U32 const mls = ms->cParams.minMatch;
assert(ms->dictMatchState == NULL);
switch(mls) switch(mls)
{ {
default: /* includes case 3 */ default: /* includes case 3 */

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -16,7 +17,8 @@
#include "zstd_compress_internal.h" #include "zstd_compress_internal.h"
void ZSTD_fillHashTable(ZSTD_matchState_t* ms, void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm); void const* end, ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp);
size_t ZSTD_compressBlock_fast( size_t ZSTD_compressBlock_fast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -22,6 +23,8 @@
*/ */
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2 #define ZSTD_LAZY_DDSS_BUCKET_LOG 2
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip); void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
@ -113,7 +116,7 @@ size_t ZSTD_compressBlock_lazy2_extDict_row(
size_t ZSTD_compressBlock_btlazy2_extDict( size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
#endif /* ZSTD_LAZY_H */ #endif /* ZSTD_LAZY_H */

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -242,11 +243,11 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
switch(ms->cParams.strategy) switch(ms->cParams.strategy)
{ {
case ZSTD_fast: case ZSTD_fast:
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
break; break;
case ZSTD_dfast: case ZSTD_dfast:
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
break; break;
case ZSTD_greedy: case ZSTD_greedy:
@ -549,7 +550,7 @@ size_t ZSTD_ldm_generateSequences(
* the window through early invalidation. * the window through early invalidation.
* TODO: * Test the chunk size. * TODO: * Test the chunk size.
* * Try invalidation after the sequence generation and test the * * Try invalidation after the sequence generation and test the
* the offset against maxDist directly. * offset against maxDist directly.
* *
* NOTE: Because of dictionaries + sequence splitting we MUST make sure * NOTE: Because of dictionaries + sequence splitting we MUST make sure
* that any offset used is valid at the END of the sequence, since it may * that any offset used is valid at the END of the sequence, since it may
@ -711,7 +712,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
rep[0] = sequence.offset; rep[0] = sequence.offset;
/* Store the sequence */ /* Store the sequence */
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
STORE_OFFSET(sequence.offset), OFFSET_TO_OFFBASE(sequence.offset),
sequence.matchLength); sequence.matchLength);
ip += sequence.matchLength; ip += sequence.matchLength;
} }

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -16,7 +17,7 @@
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
#define ZSTD_MAX_PRICE (1<<30) #define ZSTD_MAX_PRICE (1<<30)
#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ #define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
/*-************************************* /*-*************************************
@ -26,27 +27,35 @@
#if 0 /* approximation at bit level (for tests) */ #if 0 /* approximation at bit level (for tests) */
# define BITCOST_ACCURACY 0 # define BITCOST_ACCURACY 0
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat)) # define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
#elif 0 /* fractional bit accuracy (for tests) */ #elif 0 /* fractional bit accuracy (for tests) */
# define BITCOST_ACCURACY 8 # define BITCOST_ACCURACY 8
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) # define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
#else /* opt==approx, ultra==accurate */ #else /* opt==approx, ultra==accurate */
# define BITCOST_ACCURACY 8 # define BITCOST_ACCURACY 8
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) # define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
#endif #endif
/* ZSTD_bitWeight() :
* provide estimated "cost" of a stat in full bits only */
MEM_STATIC U32 ZSTD_bitWeight(U32 stat) MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
{ {
return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
} }
/* ZSTD_fracWeight() :
* provide fractional-bit "cost" of a stat,
* using linear interpolation approximation */
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
{ {
U32 const stat = rawStat + 1; U32 const stat = rawStat + 1;
U32 const hb = ZSTD_highbit32(stat); U32 const hb = ZSTD_highbit32(stat);
U32 const BWeight = hb * BITCOST_MULTIPLIER; U32 const BWeight = hb * BITCOST_MULTIPLIER;
/* Fweight was meant for "Fractional weight"
* but it's effectively a value between 1 and 2
* using fixed point arithmetic */
U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
U32 const weight = BWeight + FWeight; U32 const weight = BWeight + FWeight;
assert(hb + BITCOST_ACCURACY < 31); assert(hb + BITCOST_ACCURACY < 31);
@ -57,7 +66,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
/* debugging function, /* debugging function,
* @return price in bytes as fractional value * @return price in bytes as fractional value
* for debug messages only */ * for debug messages only */
MEM_STATIC double ZSTD_fCost(U32 price) MEM_STATIC double ZSTD_fCost(int price)
{ {
return (double)price / (BITCOST_MULTIPLIER*8); return (double)price / (BITCOST_MULTIPLIER*8);
} }
@ -88,20 +97,26 @@ static U32 sum_u32(const unsigned table[], size_t nbElts)
return total; return total;
} }
static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift) typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
static U32
ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
{ {
U32 s, sum=0; U32 s, sum=0;
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift); DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
(unsigned)lastEltIndex+1, (unsigned)shift );
assert(shift < 30); assert(shift < 30);
for (s=0; s<lastEltIndex+1; s++) { for (s=0; s<lastEltIndex+1; s++) {
table[s] = 1 + (table[s] >> shift); unsigned const base = base1 ? 1 : (table[s]>0);
sum += table[s]; unsigned const newStat = base + (table[s] >> shift);
sum += newStat;
table[s] = newStat;
} }
return sum; return sum;
} }
/* ZSTD_scaleStats() : /* ZSTD_scaleStats() :
* reduce all elements in table is sum too large * reduce all elt frequencies in table if sum too large
* return the resulting sum of elements */ * return the resulting sum of elements */
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
{ {
@ -110,7 +125,7 @@ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget); DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
assert(logTarget < 30); assert(logTarget < 30);
if (factor <= 1) return prevsum; if (factor <= 1) return prevsum;
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor)); return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
} }
/* ZSTD_rescaleFreqs() : /* ZSTD_rescaleFreqs() :
@ -129,18 +144,22 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
optPtr->priceType = zop_dynamic; optPtr->priceType = zop_dynamic;
if (optPtr->litLengthSum == 0) { /* first block : init */ if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); /* heuristic: use pre-defined stats for too small inputs */
if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
optPtr->priceType = zop_predef; optPtr->priceType = zop_predef;
} }
assert(optPtr->symbolCosts != NULL); assert(optPtr->symbolCosts != NULL);
if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
/* huffman table presumed generated by dictionary */
/* huffman stats covering the full value set : table presumed generated by dictionary */
optPtr->priceType = zop_dynamic; optPtr->priceType = zop_dynamic;
if (compressedLiterals) { if (compressedLiterals) {
/* generate literals statistics from huffman table */
unsigned lit; unsigned lit;
assert(optPtr->litFreq != NULL); assert(optPtr->litFreq != NULL);
optPtr->litSum = 0; optPtr->litSum = 0;
@ -188,13 +207,14 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
optPtr->offCodeSum += optPtr->offCodeFreq[of]; optPtr->offCodeSum += optPtr->offCodeFreq[of];
} } } }
} else { /* not a dictionary */ } else { /* first block, no dictionary */
assert(optPtr->litFreq != NULL); assert(optPtr->litFreq != NULL);
if (compressedLiterals) { if (compressedLiterals) {
/* base initial cost of literals on direct frequency within src */
unsigned lit = MaxLit; unsigned lit = MaxLit;
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8); optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
} }
{ unsigned const baseLLfreqs[MaxLL+1] = { { unsigned const baseLLfreqs[MaxLL+1] = {
@ -224,10 +244,9 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1); optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
} }
} }
} else { /* new block : re-use previous statistics, scaled down */ } else { /* new block : scale down accumulated statistics */
if (compressedLiterals) if (compressedLiterals)
optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12); optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
@ -255,11 +274,14 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
/* dynamic statistics */ /* dynamic statistics */
{ U32 price = litLength * optPtr->litSumBasePrice; { U32 price = optPtr->litSumBasePrice * litLength;
U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
U32 u; U32 u;
assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
for (u=0; u < litLength; u++) { for (u=0; u < litLength; u++) {
assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */ U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
price -= litPrice;
} }
return price; return price;
} }
@ -272,10 +294,11 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
assert(litLength <= ZSTD_BLOCKSIZE_MAX); assert(litLength <= ZSTD_BLOCKSIZE_MAX);
if (optPtr->priceType == zop_predef) if (optPtr->priceType == zop_predef)
return WEIGHT(litLength, optLevel); return WEIGHT(litLength, optLevel);
/* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
* because it isn't representable in the zstd format. So instead just /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
* call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block * because it isn't representable in the zstd format.
* would be all literals. * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
* In such a case, the block would be all literals.
*/ */
if (litLength == ZSTD_BLOCKSIZE_MAX) if (litLength == ZSTD_BLOCKSIZE_MAX)
return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel); return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
@ -289,24 +312,25 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
} }
/* ZSTD_getMatchPrice() : /* ZSTD_getMatchPrice() :
* Provides the cost of the match part (offset + matchLength) of a sequence * Provides the cost of the match part (offset + matchLength) of a sequence.
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
* @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2 * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
* @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
*/ */
FORCE_INLINE_TEMPLATE U32 FORCE_INLINE_TEMPLATE U32
ZSTD_getMatchPrice(U32 const offcode, ZSTD_getMatchPrice(U32 const offBase,
U32 const matchLength, U32 const matchLength,
const optState_t* const optPtr, const optState_t* const optPtr,
int const optLevel) int const optLevel)
{ {
U32 price; U32 price;
U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode)); U32 const offCode = ZSTD_highbit32(offBase);
U32 const mlBase = matchLength - MINMATCH; U32 const mlBase = matchLength - MINMATCH;
assert(matchLength >= MINMATCH); assert(matchLength >= MINMATCH);
if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); return WEIGHT(mlBase, optLevel)
+ ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
/* dynamic statistics */ /* dynamic statistics */
price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
@ -325,10 +349,10 @@ ZSTD_getMatchPrice(U32 const offcode,
} }
/* ZSTD_updateStats() : /* ZSTD_updateStats() :
* assumption : literals + litLengtn <= iend */ * assumption : literals + litLength <= iend */
static void ZSTD_updateStats(optState_t* const optPtr, static void ZSTD_updateStats(optState_t* const optPtr,
U32 litLength, const BYTE* literals, U32 litLength, const BYTE* literals,
U32 offsetCode, U32 matchLength) U32 offBase, U32 matchLength)
{ {
/* literals */ /* literals */
if (ZSTD_compressedLiterals(optPtr)) { if (ZSTD_compressedLiterals(optPtr)) {
@ -344,8 +368,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
optPtr->litLengthSum++; optPtr->litLengthSum++;
} }
/* offset code : expected to follow storeSeq() numeric representation */ /* offset code : follows storeSeq() numeric representation */
{ U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode)); { U32 const offCode = ZSTD_highbit32(offBase);
assert(offCode <= MaxOff); assert(offCode <= MaxOff);
optPtr->offCodeFreq[offCode]++; optPtr->offCodeFreq[offCode]++;
optPtr->offCodeSum++; optPtr->offCodeSum++;
@ -552,16 +576,17 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
} }
FORCE_INLINE_TEMPLATE FORCE_INLINE_TEMPLATE U32
U32 ZSTD_insertBtAndGetAllMatches ( ZSTD_insertBtAndGetAllMatches (
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
ZSTD_matchState_t* ms, ZSTD_matchState_t* ms,
U32* nextToUpdate3, U32* nextToUpdate3,
const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, const BYTE* const ip, const BYTE* const iLimit,
const U32 rep[ZSTD_REP_NUM], const ZSTD_dictMode_e dictMode,
U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ const U32 rep[ZSTD_REP_NUM],
const U32 lengthToBeat, const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
U32 const mls /* template */) const U32 lengthToBeat,
const U32 mls /* template */)
{ {
const ZSTD_compressionParameters* const cParams = &ms->cParams; const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
@ -644,7 +669,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
repCode, ll0, repOffset, repLen); repCode, ll0, repOffset, repLen);
bestLength = repLen; bestLength = repLen;
matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1); /* expect value between 1 and 3 */ matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
matches[mnum].len = (U32)repLen; matches[mnum].len = (U32)repLen;
mnum++; mnum++;
if ( (repLen > sufficient_len) if ( (repLen > sufficient_len)
@ -673,7 +698,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
bestLength = mlen; bestLength = mlen;
assert(curr > matchIndex3); assert(curr > matchIndex3);
assert(mnum==0); /* no prior solution */ assert(mnum==0); /* no prior solution */
matches[0].off = STORE_OFFSET(curr - matchIndex3); matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
matches[0].len = (U32)mlen; matches[0].len = (U32)mlen;
mnum = 1; mnum = 1;
if ( (mlen > sufficient_len) | if ( (mlen > sufficient_len) |
@ -706,13 +731,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
} }
if (matchLength > bestLength) { if (matchLength > bestLength) {
DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
(U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex)); (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
assert(matchEndIdx > matchIndex); assert(matchEndIdx > matchIndex);
if (matchLength > matchEndIdx - matchIndex) if (matchLength > matchEndIdx - matchIndex)
matchEndIdx = matchIndex + (U32)matchLength; matchEndIdx = matchIndex + (U32)matchLength;
bestLength = matchLength; bestLength = matchLength;
matches[mnum].off = STORE_OFFSET(curr - matchIndex); matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
matches[mnum].len = (U32)matchLength; matches[mnum].len = (U32)matchLength;
mnum++; mnum++;
if ( (matchLength > ZSTD_OPT_NUM) if ( (matchLength > ZSTD_OPT_NUM)
@ -754,12 +779,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
if (matchLength > bestLength) { if (matchLength > bestLength) {
matchIndex = dictMatchIndex + dmsIndexDelta; matchIndex = dictMatchIndex + dmsIndexDelta;
DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
(U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex)); (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
if (matchLength > matchEndIdx - matchIndex) if (matchLength > matchEndIdx - matchIndex)
matchEndIdx = matchIndex + (U32)matchLength; matchEndIdx = matchIndex + (U32)matchLength;
bestLength = matchLength; bestLength = matchLength;
matches[mnum].off = STORE_OFFSET(curr - matchIndex); matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
matches[mnum].len = (U32)matchLength; matches[mnum].len = (U32)matchLength;
mnum++; mnum++;
if ( (matchLength > ZSTD_OPT_NUM) if ( (matchLength > ZSTD_OPT_NUM)
@ -960,7 +985,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
const ZSTD_optLdm_t* optLdm, U32 currPosInBlock) const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
{ {
U32 const posDiff = currPosInBlock - optLdm->startPosInBlock; U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
/* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */ /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
/* Ensure that current block position is not outside of the match */ /* Ensure that current block position is not outside of the match */
@ -971,11 +996,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
} }
if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
U32 const candidateOffCode = STORE_OFFSET(optLdm->offset); U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u", DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
candidateOffCode, candidateMatchLength, currPosInBlock); candidateOffBase, candidateMatchLength, currPosInBlock);
matches[*nbMatches].len = candidateMatchLength; matches[*nbMatches].len = candidateMatchLength;
matches[*nbMatches].off = candidateOffCode; matches[*nbMatches].off = candidateOffBase;
(*nbMatches)++; (*nbMatches)++;
} }
} }
@ -1062,6 +1087,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
ZSTD_optimal_t lastSequence; ZSTD_optimal_t lastSequence;
ZSTD_optLdm_t optLdm; ZSTD_optLdm_t optLdm;
ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip)); ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
@ -1098,14 +1125,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
/* large match -> immediate encoding */ /* large match -> immediate encoding */
{ U32 const maxML = matches[nbMatches-1].len; { U32 const maxML = matches[nbMatches-1].len;
U32 const maxOffcode = matches[nbMatches-1].off; U32 const maxOffBase = matches[nbMatches-1].off;
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart)); nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
if (maxML > sufficient_len) { if (maxML > sufficient_len) {
lastSequence.litlen = litlen; lastSequence.litlen = litlen;
lastSequence.mlen = maxML; lastSequence.mlen = maxML;
lastSequence.off = maxOffcode; lastSequence.off = maxOffBase;
DEBUGLOG(6, "large match (%u>%u), immediate encoding", DEBUGLOG(6, "large match (%u>%u), immediate encoding",
maxML, sufficient_len); maxML, sufficient_len);
cur = 0; cur = 0;
@ -1122,15 +1149,15 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
} }
for (matchNb = 0; matchNb < nbMatches; matchNb++) { for (matchNb = 0; matchNb < nbMatches; matchNb++) {
U32 const offcode = matches[matchNb].off; U32 const offBase = matches[matchNb].off;
U32 const end = matches[matchNb].len; U32 const end = matches[matchNb].len;
for ( ; pos <= end ; pos++ ) { for ( ; pos <= end ; pos++ ) {
U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel); U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
U32 const sequencePrice = literalsPrice + matchPrice; U32 const sequencePrice = literalsPrice + matchPrice;
DEBUGLOG(7, "rPos:%u => set initial price : %.2f", DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
pos, ZSTD_fCost(sequencePrice)); pos, ZSTD_fCost((int)sequencePrice));
opt[pos].mlen = pos; opt[pos].mlen = pos;
opt[pos].off = offcode; opt[pos].off = offBase;
opt[pos].litlen = litlen; opt[pos].litlen = litlen;
opt[pos].price = (int)sequencePrice; opt[pos].price = (int)sequencePrice;
} } } }
@ -1230,7 +1257,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
U32 mlen; U32 mlen;
DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u", DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
matchNb, matches[matchNb].off, lastML, litlen); matchNb, matches[matchNb].off, lastML, litlen);
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
@ -1296,7 +1323,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
for (storePos=storeStart; storePos <= storeEnd; storePos++) { for (storePos=storeStart; storePos <= storeEnd; storePos++) {
U32 const llen = opt[storePos].litlen; U32 const llen = opt[storePos].litlen;
U32 const mlen = opt[storePos].mlen; U32 const mlen = opt[storePos].mlen;
U32 const offCode = opt[storePos].off; U32 const offBase = opt[storePos].off;
U32 const advance = llen + mlen; U32 const advance = llen + mlen;
DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
anchor - istart, (unsigned)llen, (unsigned)mlen); anchor - istart, (unsigned)llen, (unsigned)mlen);
@ -1308,8 +1335,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
} }
assert(anchor + llen <= iend); assert(anchor + llen <= iend);
ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen); ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
anchor += advance; anchor += advance;
ip = anchor; ip = anchor;
} } } }
@ -1349,7 +1376,7 @@ size_t ZSTD_compressBlock_btopt(
/* ZSTD_initStats_ultra(): /* ZSTD_initStats_ultra():
* make a first compression pass, just to seed stats with more accurate starting values. * make a first compression pass, just to seed stats with more accurate starting values.
* only works on first block, with no dictionary and no ldm. * only works on first block, with no dictionary and no ldm.
* this function cannot error, hence its contract must be respected. * this function cannot error out, its narrow contract must be respected.
*/ */
static void static void
ZSTD_initStats_ultra(ZSTD_matchState_t* ms, ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
@ -1368,7 +1395,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/ ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
/* invalidate first scan from history */ /* invalidate first scan from history, only keep entropy stats */
ZSTD_resetSeqStore(seqStore); ZSTD_resetSeqStore(seqStore);
ms->window.base -= srcSize; ms->window.base -= srcSize;
ms->window.dictLimit += (U32)srcSize; ms->window.dictLimit += (U32)srcSize;
@ -1392,20 +1419,20 @@ size_t ZSTD_compressBlock_btultra2(
U32 const curr = (U32)((const BYTE*)src - ms->window.base); U32 const curr = (U32)((const BYTE*)src - ms->window.base);
DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
/* 2-pass strategy: /* 2-passes strategy:
* this strategy makes a first pass over first block to collect statistics * this strategy makes a first pass over first block to collect statistics
* and seed next round's statistics with it. * in order to seed next round's statistics with it.
* After 1st pass, function forgets everything, and starts a new block. * After 1st pass, function forgets history, and starts a new block.
* Consequently, this can only work if no data has been previously loaded in tables, * Consequently, this can only work if no data has been previously loaded in tables,
* aka, no dictionary, no prefix, no ldm preprocessing. * aka, no dictionary, no prefix, no ldm preprocessing.
* The compression ratio gain is generally small (~0.5% on first block), * The compression ratio gain is generally small (~0.5% on first block),
* the cost is 2x cpu time on first block. */ ** the cost is 2x cpu time on first block. */
assert(srcSize <= ZSTD_BLOCKSIZE_MAX); assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
if ( (ms->opt.litLengthSum==0) /* first block */ if ( (ms->opt.litLengthSum==0) /* first block */
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
&& (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
&& (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
&& (srcSize > ZSTD_PREDEF_THRESHOLD) && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
) { ) {
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
} }

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -14,12 +15,12 @@
/*-******************************************************* /*-*******************************************************
* Dependencies * Dependencies
*********************************************************/ *********************************************************/
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
#include "../common/cpu.h" /* bmi2 */ #include "../common/cpu.h" /* bmi2 */
#include "../common/mem.h" /* low level memory routines */ #include "../common/mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY #define FSE_STATIC_LINKING_ONLY
#include "../common/fse.h" #include "../common/fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h" #include "../common/huf.h"
#include "zstd_decompress_internal.h" #include "zstd_decompress_internal.h"
#include "zstd_ddict.h" #include "zstd_ddict.h"
@ -131,7 +132,7 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
ZSTD_memcpy(internalBuffer, dict, dictSize); ZSTD_memcpy(internalBuffer, dict, dictSize);
} }
ddict->dictSize = dictSize; ddict->dictSize = dictSize;
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */
/* parse dictionary content */ /* parse dictionary content */
FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
@ -237,5 +238,5 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
{ {
if (ddict==NULL) return 0; if (ddict==NULL) return 0;
return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); return ddict->dictID;
} }

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -52,17 +53,18 @@
/*-******************************************************* /*-*******************************************************
* Dependencies * Dependencies
*********************************************************/ *********************************************************/
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
#include "../common/mem.h" /* low level memory routines */ #include "../common/mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY #define FSE_STATIC_LINKING_ONLY
#include "../common/fse.h" #include "../common/fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h" #include "../common/huf.h"
#include <linux/xxhash.h> /* xxh64_reset, xxh64_update, xxh64_digest, XXH64 */ #include <linux/xxhash.h> /* xxh64_reset, xxh64_update, xxh64_digest, XXH64 */
#include "../common/zstd_internal.h" /* blockProperties_t */ #include "../common/zstd_internal.h" /* blockProperties_t */
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */ #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */ #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
#include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */ #include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */
#include "../common/bits.h" /* ZSTD_highbit32 */
@ -72,11 +74,11 @@
*************************************/ *************************************/
#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4 #define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. #define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
* Currently, that means a 0.75 load factor. * Currently, that means a 0.75 load factor.
* So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
* the load factor of the ddict hash set. * the load factor of the ddict hash set.
*/ */
#define DDICT_HASHSET_TABLE_BASE_SIZE 64 #define DDICT_HASHSET_TABLE_BASE_SIZE 64
#define DDICT_HASHSET_RESIZE_FACTOR 2 #define DDICT_HASHSET_RESIZE_FACTOR 2
@ -237,6 +239,7 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
dctx->outBufferMode = ZSTD_bm_buffered; dctx->outBufferMode = ZSTD_bm_buffered;
dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
dctx->disableHufAsm = 0;
} }
static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
@ -421,16 +424,40 @@ size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
* note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
* @return : 0, `zfhPtr` is correctly filled, * @return : 0, `zfhPtr` is correctly filled,
* >0, `srcSize` is too small, value is wanted `srcSize` amount, * >0, `srcSize` is too small, value is wanted `srcSize` amount,
* or an error code, which can be tested using ZSTD_isError() */ ** or an error code, which can be tested using ZSTD_isError() */
size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
{ {
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;
size_t const minInputSize = ZSTD_startingInputLength(format); size_t const minInputSize = ZSTD_startingInputLength(format);
ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ DEBUGLOG(5, "ZSTD_getFrameHeader_advanced: minInputSize = %zu, srcSize = %zu", minInputSize, srcSize);
if (srcSize < minInputSize) return minInputSize;
RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
if (srcSize > 0) {
/* note : technically could be considered an assert(), since it's an invalid entry */
RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter : src==NULL, but srcSize>0");
}
if (srcSize < minInputSize) {
if (srcSize > 0 && format != ZSTD_f_zstd1_magicless) {
/* when receiving less than @minInputSize bytes,
* control these bytes at least correspond to a supported magic number
* in order to error out early if they don't.
**/
size_t const toCopy = MIN(4, srcSize);
unsigned char hbuf[4]; MEM_writeLE32(hbuf, ZSTD_MAGICNUMBER);
assert(src != NULL);
ZSTD_memcpy(hbuf, src, toCopy);
if ( MEM_readLE32(hbuf) != ZSTD_MAGICNUMBER ) {
/* not a zstd frame : let's check if it's a skippable frame */
MEM_writeLE32(hbuf, ZSTD_MAGIC_SKIPPABLE_START);
ZSTD_memcpy(hbuf, src, toCopy);
if ((MEM_readLE32(hbuf) & ZSTD_MAGIC_SKIPPABLE_MASK) != ZSTD_MAGIC_SKIPPABLE_START) {
RETURN_ERROR(prefix_unknown,
"first bytes don't correspond to any supported magic number");
} } }
return minInputSize;
}
ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzers may not understand that zfhPtr will be read only if return value is zero, since they are 2 different signals */
if ( (format != ZSTD_f_zstd1_magicless) if ( (format != ZSTD_f_zstd1_magicless)
&& (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
@ -540,49 +567,52 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
frameParameter_unsupported, ""); frameParameter_unsupported, "");
{ { size_t const skippableSize = skippableHeaderSize + sizeU32;
size_t const skippableSize = skippableHeaderSize + sizeU32;
RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
return skippableSize; return skippableSize;
} }
} }
/*! ZSTD_readSkippableFrame() : /*! ZSTD_readSkippableFrame() :
* Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer. * Retrieves content of a skippable frame, and writes it to dst buffer.
* *
* The parameter magicVariant will receive the magicVariant that was supplied when the frame was written, * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
* i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested
* in the magicVariant. * in the magicVariant.
* *
* Returns an error if destination buffer is not large enough, or if the frame is not skippable. * Returns an error if destination buffer is not large enough, or if this is not a valid skippable frame.
* *
* @return : number of bytes written or a ZSTD error. * @return : number of bytes written or a ZSTD error.
*/ */
ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant, size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity,
const void* src, size_t srcSize) unsigned* magicVariant, /* optional, can be NULL */
const void* src, size_t srcSize)
{ {
U32 const magicNumber = MEM_readLE32(src); RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
/* check input validity */ { U32 const magicNumber = MEM_readLE32(src);
RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, ""); size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, ""); size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
/* deliver payload */ /* check input validity */
if (skippableContentSize > 0 && dst != NULL) RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize); RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
if (magicVariant != NULL) RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
*magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
return skippableContentSize; /* deliver payload */
if (skippableContentSize > 0 && dst != NULL)
ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
if (magicVariant != NULL)
*magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
return skippableContentSize;
}
} }
/* ZSTD_findDecompressedSize() : /* ZSTD_findDecompressedSize() :
* compatible with legacy mode
* `srcSize` must be the exact length of some number of ZSTD compressed and/or * `srcSize` must be the exact length of some number of ZSTD compressed and/or
* skippable frames * skippable frames
* @return : decompressed size of the frames contained */ * note: compatible with legacy mode
* @return : decompressed size of the frames contained */
unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
{ {
unsigned long long totalDstSize = 0; unsigned long long totalDstSize = 0;
@ -592,9 +622,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
size_t const skippableSize = readSkippableFrameSize(src, srcSize); size_t const skippableSize = readSkippableFrameSize(src, srcSize);
if (ZSTD_isError(skippableSize)) { if (ZSTD_isError(skippableSize)) return ZSTD_CONTENTSIZE_ERROR;
return ZSTD_CONTENTSIZE_ERROR;
}
assert(skippableSize <= srcSize); assert(skippableSize <= srcSize);
src = (const BYTE *)src + skippableSize; src = (const BYTE *)src + skippableSize;
@ -602,17 +630,17 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
continue; continue;
} }
{ unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); { unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize);
if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs;
/* check for overflow */ if (totalDstSize + fcs < totalDstSize)
if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */
totalDstSize += ret; totalDstSize += fcs;
} }
/* skip to next frame */
{ size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
if (ZSTD_isError(frameSrcSize)) { if (ZSTD_isError(frameSrcSize)) return ZSTD_CONTENTSIZE_ERROR;
return ZSTD_CONTENTSIZE_ERROR; assert(frameSrcSize <= srcSize);
}
src = (const BYTE *)src + frameSrcSize; src = (const BYTE *)src + frameSrcSize;
srcSize -= frameSrcSize; srcSize -= frameSrcSize;
@ -730,10 +758,11 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
ip += 4; ip += 4;
} }
frameSizeInfo.nbBlocks = nbBlocks;
frameSizeInfo.compressedSize = (size_t)(ip - ipstart); frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
? zfh.frameContentSize ? zfh.frameContentSize
: nbBlocks * zfh.blockSizeMax; : (unsigned long long)nbBlocks * zfh.blockSizeMax;
return frameSizeInfo; return frameSizeInfo;
} }
} }
@ -773,6 +802,48 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
return bound; return bound;
} }
size_t ZSTD_decompressionMargin(void const* src, size_t srcSize)
{
size_t margin = 0;
unsigned maxBlockSize = 0;
/* Iterate over each frame */
while (srcSize > 0) {
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
size_t const compressedSize = frameSizeInfo.compressedSize;
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
ZSTD_frameHeader zfh;
FORWARD_IF_ERROR(ZSTD_getFrameHeader(&zfh, src, srcSize), "");
if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
return ERROR(corruption_detected);
if (zfh.frameType == ZSTD_frame) {
/* Add the frame header to our margin */
margin += zfh.headerSize;
/* Add the checksum to our margin */
margin += zfh.checksumFlag ? 4 : 0;
/* Add 3 bytes per block */
margin += 3 * frameSizeInfo.nbBlocks;
/* Compute the max block size */
maxBlockSize = MAX(maxBlockSize, zfh.blockSizeMax);
} else {
assert(zfh.frameType == ZSTD_skippableFrame);
/* Add the entire skippable frame size to our margin. */
margin += compressedSize;
}
assert(srcSize >= compressedSize);
src = (const BYTE*)src + compressedSize;
srcSize -= compressedSize;
}
/* Add the max block size back to the margin. */
margin += maxBlockSize;
return margin;
}
/*-************************************************************* /*-*************************************************************
* Frame decoding * Frame decoding
@ -930,6 +1001,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
} }
ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0); ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
/* Allow caller to get size read */ /* Allow caller to get size read */
DEBUGLOG(4, "ZSTD_decompressFrame: decompressed frame of size %zi, consuming %zi bytes of input", op-ostart, ip - (const BYTE*)*srcPtr);
*srcPtr = ip; *srcPtr = ip;
*srcSizePtr = remainingSrcSize; *srcSizePtr = remainingSrcSize;
return (size_t)(op-ostart); return (size_t)(op-ostart);
@ -955,17 +1027,18 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
while (srcSize >= ZSTD_startingInputLength(dctx->format)) { while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
{ U32 const magicNumber = MEM_readLE32(src); if (srcSize >= 4) {
DEBUGLOG(4, "reading magic number %08X (expecting %08X)", U32 const magicNumber = MEM_readLE32(src);
(unsigned)magicNumber, ZSTD_MAGICNUMBER); DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
/* skippable frame detected : skip it */
size_t const skippableSize = readSkippableFrameSize(src, srcSize); size_t const skippableSize = readSkippableFrameSize(src, srcSize);
FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); FORWARD_IF_ERROR(skippableSize, "invalid skippable frame");
assert(skippableSize <= srcSize); assert(skippableSize <= srcSize);
src = (const BYTE *)src + skippableSize; src = (const BYTE *)src + skippableSize;
srcSize -= skippableSize; srcSize -= skippableSize;
continue; continue; /* check next frame */
} } } }
if (ddict) { if (ddict) {
@ -1061,8 +1134,8 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
/* /*
* Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, we
* we allow taking a partial block as the input. Currently only raw uncompressed blocks can * allow taking a partial block as the input. Currently only raw uncompressed blocks can
* be streamed. * be streamed.
* *
* For blocks that can be streamed, this allows us to reduce the latency until we produce * For blocks that can be streamed, this allows us to reduce the latency until we produce
@ -1262,7 +1335,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
default: default:
assert(0); /* impossible */ assert(0); /* impossible */
RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ RETURN_ERROR(GENERIC, "impossible to reach"); /* some compilers require default to do something */
} }
} }
@ -1303,11 +1376,11 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
/* in minimal huffman, we always use X1 variants */ /* in minimal huffman, we always use X1 variants */
size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
dictPtr, dictEnd - dictPtr, dictPtr, dictEnd - dictPtr,
workspace, workspaceSize); workspace, workspaceSize, /* flags */ 0);
#else #else
size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
dictPtr, (size_t)(dictEnd - dictPtr), dictPtr, (size_t)(dictEnd - dictPtr),
workspace, workspaceSize); workspace, workspaceSize, /* flags */ 0);
#endif #endif
RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
dictPtr += hSize; dictPtr += hSize;
@ -1403,7 +1476,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
dctx->prefixStart = NULL; dctx->prefixStart = NULL;
dctx->virtualStart = NULL; dctx->virtualStart = NULL;
dctx->dictEnd = NULL; dctx->dictEnd = NULL;
dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ dctx->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */
dctx->litEntropy = dctx->fseEntropy = 0; dctx->litEntropy = dctx->fseEntropy = 0;
dctx->dictID = 0; dctx->dictID = 0;
dctx->bType = bt_reserved; dctx->bType = bt_reserved;
@ -1465,7 +1538,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
* This could for one of the following reasons : * This could for one of the following reasons :
* - The frame does not require a dictionary (most common case). * - The frame does not require a dictionary (most common case).
* - The frame was built with dictID intentionally removed. * - The frame was built with dictID intentionally removed.
* Needed dictionary is a hidden information. * Needed dictionary is a hidden piece of information.
* Note : this use case also happens when using a non-conformant dictionary. * Note : this use case also happens when using a non-conformant dictionary.
* - `srcSize` is too small, and as a result, frame header could not be decoded. * - `srcSize` is too small, and as a result, frame header could not be decoded.
* Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
@ -1474,7 +1547,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
* ZSTD_getFrameHeader(), which will provide a more precise error code. */ * ZSTD_getFrameHeader(), which will provide a more precise error code. */
unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
{ {
ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0 };
size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
if (ZSTD_isError(hError)) return 0; if (ZSTD_isError(hError)) return 0;
return zfp.dictID; return zfp.dictID;
@ -1581,7 +1654,9 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di
size_t ZSTD_initDStream(ZSTD_DStream* zds) size_t ZSTD_initDStream(ZSTD_DStream* zds)
{ {
DEBUGLOG(4, "ZSTD_initDStream"); DEBUGLOG(4, "ZSTD_initDStream");
return ZSTD_initDStream_usingDDict(zds, NULL); FORWARD_IF_ERROR(ZSTD_DCtx_reset(zds, ZSTD_reset_session_only), "");
FORWARD_IF_ERROR(ZSTD_DCtx_refDDict(zds, NULL), "");
return ZSTD_startingInputLength(zds->format);
} }
/* ZSTD_initDStream_usingDDict() : /* ZSTD_initDStream_usingDDict() :
@ -1589,6 +1664,7 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds)
* this function cannot fail */ * this function cannot fail */
size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
{ {
DEBUGLOG(4, "ZSTD_initDStream_usingDDict");
FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
return ZSTD_startingInputLength(dctx->format); return ZSTD_startingInputLength(dctx->format);
@ -1599,6 +1675,7 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
* this function cannot fail */ * this function cannot fail */
size_t ZSTD_resetDStream(ZSTD_DStream* dctx) size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
{ {
DEBUGLOG(4, "ZSTD_resetDStream");
FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
return ZSTD_startingInputLength(dctx->format); return ZSTD_startingInputLength(dctx->format);
} }
@ -1670,6 +1747,11 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict; bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts; bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
return bounds; return bounds;
case ZSTD_d_disableHuffmanAssembly:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
default:; default:;
} }
bounds.error = ERROR(parameter_unsupported); bounds.error = ERROR(parameter_unsupported);
@ -1710,6 +1792,9 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value
case ZSTD_d_refMultipleDDicts: case ZSTD_d_refMultipleDDicts:
*value = (int)dctx->refMultipleDDicts; *value = (int)dctx->refMultipleDDicts;
return 0; return 0;
case ZSTD_d_disableHuffmanAssembly:
*value = (int)dctx->disableHufAsm;
return 0;
default:; default:;
} }
RETURN_ERROR(parameter_unsupported, ""); RETURN_ERROR(parameter_unsupported, "");
@ -1743,6 +1828,10 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
} }
dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value; dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
return 0; return 0;
case ZSTD_d_disableHuffmanAssembly:
CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value);
dctx->disableHufAsm = value != 0;
return 0;
default:; default:;
} }
RETURN_ERROR(parameter_unsupported, ""); RETURN_ERROR(parameter_unsupported, "");
@ -1918,7 +2007,6 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
if (zds->refMultipleDDicts && zds->ddictSet) { if (zds->refMultipleDDicts && zds->ddictSet) {
ZSTD_DCtx_selectFrameDDict(zds); ZSTD_DCtx_selectFrameDDict(zds);
} }
DEBUGLOG(5, "header size : %u", (U32)hSize);
if (ZSTD_isError(hSize)) { if (ZSTD_isError(hSize)) {
return hSize; /* error */ return hSize; /* error */
} }
@ -1932,6 +2020,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
zds->lhSize += remainingInput; zds->lhSize += remainingInput;
} }
input->pos = input->size; input->pos = input->size;
/* check first few bytes */
FORWARD_IF_ERROR(
ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format),
"First few bytes detected incorrect" );
/* return hint input size */
return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
} }
assert(ip != NULL); assert(ip != NULL);
@ -1949,8 +2042,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
if (ZSTD_isError(decompressedSize)) return decompressedSize; if (ZSTD_isError(decompressedSize)) return decompressedSize;
DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
assert(istart != NULL);
ip = istart + cSize; ip = istart + cSize;
op += decompressedSize; op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */
zds->expected = 0; zds->expected = 0;
zds->streamStage = zdss_init; zds->streamStage = zdss_init;
someMoreWork = 0; someMoreWork = 0;
@ -2034,6 +2128,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
} }
if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */
FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
assert(ip != NULL);
ip += neededInSize; ip += neededInSize;
/* Function modifies the stage so we must break */ /* Function modifies the stage so we must break */
break; break;
@ -2048,7 +2143,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
int const isSkipFrame = ZSTD_isSkipFrame(zds); int const isSkipFrame = ZSTD_isSkipFrame(zds);
size_t loadedSize; size_t loadedSize;
/* At this point we shouldn't be decompressing a block that we can stream. */ /* At this point we shouldn't be decompressing a block that we can stream. */
assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)));
if (isSkipFrame) { if (isSkipFrame) {
loadedSize = MIN(toLoad, (size_t)(iend-ip)); loadedSize = MIN(toLoad, (size_t)(iend-ip));
} else { } else {
@ -2057,8 +2152,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
"should never happen"); "should never happen");
loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip)); loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
} }
ip += loadedSize; if (loadedSize != 0) {
zds->inPos += loadedSize; /* ip may be NULL */
ip += loadedSize;
zds->inPos += loadedSize;
}
if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */
/* decode loaded input */ /* decode loaded input */
@ -2068,14 +2166,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
break; break;
} }
case zdss_flush: case zdss_flush:
{ size_t const toFlushSize = zds->outEnd - zds->outStart; {
size_t const toFlushSize = zds->outEnd - zds->outStart;
size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize); size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
op += flushedSize;
op = op ? op + flushedSize : op;
zds->outStart += flushedSize; zds->outStart += flushedSize;
if (flushedSize == toFlushSize) { /* flush completed */ if (flushedSize == toFlushSize) { /* flush completed */
zds->streamStage = zdss_read; zds->streamStage = zdss_read;
if ( (zds->outBuffSize < zds->fParams.frameContentSize) if ( (zds->outBuffSize < zds->fParams.frameContentSize)
&& (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
(int)(zds->outBuffSize - zds->outStart), (int)(zds->outBuffSize - zds->outStart),
(U32)zds->fParams.blockSizeMax); (U32)zds->fParams.blockSizeMax);
@ -2089,7 +2190,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
default: default:
assert(0); /* impossible */ assert(0); /* impossible */
RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ RETURN_ERROR(GENERIC, "impossible to reach"); /* some compilers require default to do something */
} } } }
/* result */ /* result */
@ -2102,8 +2203,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
if ((ip==istart) && (op==ostart)) { /* no forward progress */ if ((ip==istart) && (op==ostart)) { /* no forward progress */
zds->noForwardProgress ++; zds->noForwardProgress ++;
if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); RETURN_ERROR_IF(op==oend, noForwardProgress_destFull, "");
RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); RETURN_ERROR_IF(ip==iend, noForwardProgress_inputEmpty, "");
assert(0); assert(0);
} }
} else { } else {
@ -2140,11 +2241,17 @@ size_t ZSTD_decompressStream_simpleArgs (
void* dst, size_t dstCapacity, size_t* dstPos, void* dst, size_t dstCapacity, size_t* dstPos,
const void* src, size_t srcSize, size_t* srcPos) const void* src, size_t srcSize, size_t* srcPos)
{ {
ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; ZSTD_outBuffer output;
ZSTD_inBuffer input = { src, srcSize, *srcPos }; ZSTD_inBuffer input;
/* ZSTD_compress_generic() will check validity of dstPos and srcPos */ output.dst = dst;
size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); output.size = dstCapacity;
*dstPos = output.pos; output.pos = *dstPos;
*srcPos = input.pos; input.src = src;
return cErr; input.size = srcSize;
input.pos = *srcPos;
{ size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
*dstPos = output.pos;
*srcPos = input.pos;
return cErr;
}
} }

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -20,12 +21,12 @@
#include "../common/mem.h" /* low level memory routines */ #include "../common/mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY #define FSE_STATIC_LINKING_ONLY
#include "../common/fse.h" #include "../common/fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h" #include "../common/huf.h"
#include "../common/zstd_internal.h" #include "../common/zstd_internal.h"
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */ #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */ #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
#include "zstd_decompress_block.h" #include "zstd_decompress_block.h"
#include "../common/bits.h" /* ZSTD_highbit32 */
/*_******************************************************* /*_*******************************************************
* Macros * Macros
@ -89,7 +90,7 @@ static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const
dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE; dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
} }
else { else {
/* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */ /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize; dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize; dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
} }
@ -134,13 +135,16 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
ZSTD_FALLTHROUGH; ZSTD_FALLTHROUGH;
case set_compressed: case set_compressed:
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
{ size_t lhSize, litSize, litCSize; { size_t lhSize, litSize, litCSize;
U32 singleStream=0; U32 singleStream=0;
U32 const lhlCode = (istart[0] >> 2) & 3; U32 const lhlCode = (istart[0] >> 2) & 3;
U32 const lhc = MEM_readLE32(istart); U32 const lhc = MEM_readLE32(istart);
size_t hufSuccess; size_t hufSuccess;
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
int const flags = 0
| (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
| (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
switch(lhlCode) switch(lhlCode)
{ {
case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
@ -165,6 +169,10 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
} }
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
if (!singleStream)
RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
"Not enough literals (%zu) for the 4-streams mode (min %u)",
litSize, MIN_LITERALS_FOR_4_STREAMS);
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, ""); RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0); ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
@ -176,13 +184,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
if (litEncType==set_repeat) { if (litEncType==set_repeat) {
if (singleStream) { if (singleStream) {
hufSuccess = HUF_decompress1X_usingDTable_bmi2( hufSuccess = HUF_decompress1X_usingDTable(
dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->litBuffer, litSize, istart+lhSize, litCSize,
dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); dctx->HUFptr, flags);
} else { } else {
hufSuccess = HUF_decompress4X_usingDTable_bmi2( assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
hufSuccess = HUF_decompress4X_usingDTable(
dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->litBuffer, litSize, istart+lhSize, litCSize,
dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); dctx->HUFptr, flags);
} }
} else { } else {
if (singleStream) { if (singleStream) {
@ -190,18 +199,18 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
hufSuccess = HUF_decompress1X_DCtx_wksp( hufSuccess = HUF_decompress1X_DCtx_wksp(
dctx->entropy.hufTable, dctx->litBuffer, litSize, dctx->entropy.hufTable, dctx->litBuffer, litSize,
istart+lhSize, litCSize, dctx->workspace, istart+lhSize, litCSize, dctx->workspace,
sizeof(dctx->workspace)); sizeof(dctx->workspace), flags);
#else #else
hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( hufSuccess = HUF_decompress1X1_DCtx_wksp(
dctx->entropy.hufTable, dctx->litBuffer, litSize, dctx->entropy.hufTable, dctx->litBuffer, litSize,
istart+lhSize, litCSize, dctx->workspace, istart+lhSize, litCSize, dctx->workspace,
sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); sizeof(dctx->workspace), flags);
#endif #endif
} else { } else {
hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( hufSuccess = HUF_decompress4X_hufOnly_wksp(
dctx->entropy.hufTable, dctx->litBuffer, litSize, dctx->entropy.hufTable, dctx->litBuffer, litSize,
istart+lhSize, litCSize, dctx->workspace, istart+lhSize, litCSize, dctx->workspace,
sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); sizeof(dctx->workspace), flags);
} }
} }
if (dctx->litBufferLocation == ZSTD_split) if (dctx->litBufferLocation == ZSTD_split)
@ -237,6 +246,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
break; break;
case 3: case 3:
lhSize = 3; lhSize = 3;
RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
litSize = MEM_readLE24(istart) >> 4; litSize = MEM_readLE24(istart) >> 4;
break; break;
} }
@ -279,12 +289,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
break; break;
case 1: case 1:
lhSize = 2; lhSize = 2;
RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
litSize = MEM_readLE16(istart) >> 4; litSize = MEM_readLE16(istart) >> 4;
break; break;
case 3: case 3:
lhSize = 3; lhSize = 3;
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
litSize = MEM_readLE24(istart) >> 4; litSize = MEM_readLE24(istart) >> 4;
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
break; break;
} }
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
@ -506,14 +517,15 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
for (i = 8; i < n; i += 8) { for (i = 8; i < n; i += 8) {
MEM_write64(spread + pos + i, sv); MEM_write64(spread + pos + i, sv);
} }
pos += n; assert(n>=0);
pos += (size_t)n;
} }
} }
/* Now we spread those positions across the table. /* Now we spread those positions across the table.
* The benefit of doing it in two stages is that we avoid the the * The benefit of doing it in two stages is that we avoid the
* variable size inner loop, which caused lots of branch misses. * variable size inner loop, which caused lots of branch misses.
* Now we can run through all the positions without any branch misses. * Now we can run through all the positions without any branch misses.
* We unroll the loop twice, since that is what emperically worked best. * We unroll the loop twice, since that is what empirically worked best.
*/ */
{ {
size_t position = 0; size_t position = 0;
@ -540,7 +552,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
tableDecode[position].baseValue = s; tableDecode[position].baseValue = s;
position = (position + step) & tableMask; position = (position + step) & tableMask;
while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */ while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */
} } } }
assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
} }
@ -551,7 +563,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
for (u=0; u<tableSize; u++) { for (u=0; u<tableSize; u++) {
U32 const symbol = tableDecode[u].baseValue; U32 const symbol = tableDecode[u].baseValue;
U32 const nextState = symbolNext[symbol]++; U32 const nextState = symbolNext[symbol]++;
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) ); tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
assert(nbAdditionalBits[symbol] < 255); assert(nbAdditionalBits[symbol] < 255);
tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol]; tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
@ -964,6 +976,11 @@ size_t ZSTD_execSequence(BYTE* op,
assert(op != NULL /* Precondition */); assert(op != NULL /* Precondition */);
assert(oend_w < oend /* No underflow */); assert(oend_w < oend /* No underflow */);
#if defined(__aarch64__)
/* prefetch sequence starting from match that will be used for copy later */
PREFETCH_L1(match);
#endif
/* Handle edge cases in a slow path: /* Handle edge cases in a slow path:
* - Read beyond end of literals * - Read beyond end of literals
* - Match end is within WILDCOPY_OVERLIMIT of oend * - Match end is within WILDCOPY_OVERLIMIT of oend
@ -1154,7 +1171,7 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16
} }
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
* bits before reloading. This value is the maximum number of bytes we read * bits before reloading. This value is the maximum number of bytes we read
* after reloading when we are decoding long offsets. * after reloading when we are decoding long offsets.
*/ */
@ -1169,9 +1186,27 @@ FORCE_INLINE_TEMPLATE seq_t
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
{ {
seq_t seq; seq_t seq;
/*
* ZSTD_seqSymbol is a structure with a total of 64 bits wide. So it can be
* loaded in one operation and extracted its fields by simply shifting or
* bit-extracting on aarch64.
* GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
* operations that cause performance drop. This can be avoided by using this
* ZSTD_memcpy hack.
*/
#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
ZSTD_seqSymbol* const llDInfo = &llDInfoS;
ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
#else
const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state; const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state; const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state; const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
#endif
seq.matchLength = mlDInfo->baseValue; seq.matchLength = mlDInfo->baseValue;
seq.litLength = llDInfo->baseValue; seq.litLength = llDInfo->baseValue;
{ U32 const ofBase = ofDInfo->baseValue; { U32 const ofBase = ofDInfo->baseValue;
@ -1186,28 +1221,31 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
U32 const llnbBits = llDInfo->nbBits; U32 const llnbBits = llDInfo->nbBits;
U32 const mlnbBits = mlDInfo->nbBits; U32 const mlnbBits = mlDInfo->nbBits;
U32 const ofnbBits = ofDInfo->nbBits; U32 const ofnbBits = ofDInfo->nbBits;
assert(llBits <= MaxLLBits);
assert(mlBits <= MaxMLBits);
assert(ofBits <= MaxOff);
/* /*
* As gcc has better branch and block analyzers, sometimes it is only * As gcc has better branch and block analyzers, sometimes it is only
* valuable to mark likelyness for clang, it gives around 3-4% of * valuable to mark likeliness for clang, it gives around 3-4% of
* performance. * performance.
*/ */
/* sequence */ /* sequence */
{ size_t offset; { size_t offset;
#if defined(__clang__)
if (LIKELY(ofBits > 1)) {
#else
if (ofBits > 1) { if (ofBits > 1) {
#endif
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
assert(ofBits <= MaxOff); ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); /* Always read extra bits, this keeps the logic simple,
* avoids branches, and avoids accidentally reading 0 bits.
*/
U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
BIT_reloadDStream(&seqState->DStream); BIT_reloadDStream(&seqState->DStream);
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); offset += BIT_readBitsFast(&seqState->DStream, extraBits);
assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
} else { } else {
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
@ -1232,11 +1270,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
seq.offset = offset; seq.offset = offset;
} }
#if defined(__clang__)
if (UNLIKELY(mlBits > 0))
#else
if (mlBits > 0) if (mlBits > 0)
#endif
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
@ -1246,11 +1280,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */ /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
#if defined(__clang__)
if (UNLIKELY(llBits > 0))
#else
if (llBits > 0) if (llBits > 0)
#endif
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
if (MEM_32bits()) if (MEM_32bits())
@ -1552,7 +1582,7 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart); const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
const BYTE* const vBase = (const BYTE*)(dctx->virtualStart); const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd); const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
DEBUGLOG(5, "ZSTD_decompressSequences_body"); DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
(void)frame; (void)frame;
/* Regen sequences */ /* Regen sequences */
@ -1945,34 +1975,79 @@ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
/*
* @returns The total size of the history referenceable by zstd, including
* both the prefix and the extDict. At @p op any offset larger than this
* is invalid.
*/
static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
{
return (size_t)(op - virtualStart);
}
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ typedef struct {
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) unsigned longOffsetShare;
/* ZSTD_getLongOffsetsShare() : unsigned maxNbAdditionalBits;
} ZSTD_OffsetInfo;
/* ZSTD_getOffsetInfo() :
* condition : offTable must be valid * condition : offTable must be valid
* @return : "share" of long offsets (arbitrarily defined as > (1<<23)) * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
* compared to maximum possible of (1<<OffFSELog) */ * compared to maximum possible of (1<<OffFSELog),
static unsigned * as well as the maximum number additional bits required.
ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable) */
static ZSTD_OffsetInfo
ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
{ {
const void* ptr = offTable; ZSTD_OffsetInfo info = {0, 0};
U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog; /* If nbSeq == 0, then the offTable is uninitialized, but we have
const ZSTD_seqSymbol* table = offTable + 1; * no sequences, so both values should be 0.
U32 const max = 1 << tableLog; */
U32 u, total = 0; if (nbSeq != 0) {
DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog); const void* ptr = offTable;
U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
const ZSTD_seqSymbol* table = offTable + 1;
U32 const max = 1 << tableLog;
U32 u;
DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
assert(max <= (1 << OffFSELog)); /* max not too large */ assert(max <= (1 << OffFSELog)); /* max not too large */
for (u=0; u<max; u++) { for (u=0; u<max; u++) {
if (table[u].nbAdditionalBits > 22) total += 1; info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
}
assert(tableLog <= OffFSELog);
info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */
} }
assert(tableLog <= OffFSELog); return info;
total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ }
return total; /*
* @returns The maximum offset we can decode in one read of our bitstream, without
* reloading more bits in the middle of the offset bits read. Any offsets larger
* than this must use the long offset decoder.
*/
static size_t ZSTD_maxShortOffset(void)
{
if (MEM_64bits()) {
/* We can decode any offset without reloading bits.
* This might change if the max window size grows.
*/
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
return (size_t)-1;
} else {
/* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
* This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
* Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
*/
size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
return maxOffset;
}
} }
#endif
size_t size_t
ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
@ -1980,20 +2055,21 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
const void* src, size_t srcSize, const int frame, const streaming_operation streaming) const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
{ /* blockType == blockCompressed */ { /* blockType == blockCompressed */
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;
/* isLongOffset must be true if there are long offsets.
* Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
* We don't expect that to be the case in 64-bit mode.
* In block mode, window size is not known, so we have to be conservative.
* (note: but it could be evaluated from current-lowLimit)
*/
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); /* Note : the wording of the specification
* allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX.
* This generally does not happen, as it makes little sense,
* since an uncompressed block would feature same size and have no decompression cost.
* Also, note that decoder from reference libzstd before < v1.5.4
* would consider this edge case as an error.
* As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX
* for broader compatibility with the deployed ecosystem of zstd decoders */
RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
/* Decode literals section */ /* Decode literals section */
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming); { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
if (ZSTD_isError(litCSize)) return litCSize; if (ZSTD_isError(litCSize)) return litCSize;
ip += litCSize; ip += litCSize;
srcSize -= litCSize; srcSize -= litCSize;
@ -2001,6 +2077,23 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
/* Build Decoding Tables */ /* Build Decoding Tables */
{ {
/* Compute the maximum block size, which must also work when !frame and fParams are unset.
* Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
*/
size_t const blockSizeMax = MIN(dstCapacity, (frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX));
size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + blockSizeMax, (BYTE const*)dctx->virtualStart);
/* isLongOffset must be true if there are long offsets.
* Offsets are long if they are larger than ZSTD_maxShortOffset().
* We don't expect that to be the case in 64-bit mode.
*
* We check here to see if our history is large enough to allow long offsets.
* If it isn't, then we can't possible have (valid) long offsets. If the offset
* is invalid, then it is okay to read it incorrectly.
*
* If isLongOffsets is true, then we will later check our decoding table to see
* if it is even possible to generate long offsets.
*/
ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
/* These macros control at build-time which decompressor implementation /* These macros control at build-time which decompressor implementation
* we use. If neither is defined, we do some inspection and dispatch at * we use. If neither is defined, we do some inspection and dispatch at
* runtime. * runtime.
@ -2008,6 +2101,11 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
int usePrefetchDecoder = dctx->ddictIsCold; int usePrefetchDecoder = dctx->ddictIsCold;
#else
/* Set to 1 to avoid computing offset info if we don't need to.
* Otherwise this value is ignored.
*/
int usePrefetchDecoder = 1;
#endif #endif
int nbSeq; int nbSeq;
size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
@ -2015,28 +2113,42 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
ip += seqHSize; ip += seqHSize;
srcSize -= seqHSize; srcSize -= seqHSize;
RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
"invalid dst");
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ /* If we could potentially have long offsets, or we might want to use the prefetch decoder,
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) * compute information about the share of long offsets, and the maximum nbAdditionalBits.
if ( !usePrefetchDecoder * NOTE: could probably use a larger nbSeq limit
&& (!frame || (dctx->fParams.windowSize > (1<<24))) */
&& (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
usePrefetchDecoder = (shareLongOffsets >= minShare); /* If isLongOffset, but the maximum number of additional bits that we see in our table is small
* enough, then we know it is impossible to have too long an offset in this block, so we can
* use the regular offset decoder.
*/
isLongOffset = ZSTD_lo_isRegularOffset;
}
if (!usePrefetchDecoder) {
U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
usePrefetchDecoder = (info.longOffsetShare >= minShare);
}
} }
#endif
dctx->ddictIsCold = 0; dctx->ddictIsCold = 0;
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
if (usePrefetchDecoder) if (usePrefetchDecoder) {
#else
(void)usePrefetchDecoder;
{
#endif #endif
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
#endif #endif
}
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
/* else */ /* else */
@ -2060,9 +2172,9 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
} }
size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize) const void* src, size_t srcSize)
{ {
size_t dSize; size_t dSize;
ZSTD_checkContinuity(dctx, dst, dstCapacity); ZSTD_checkContinuity(dctx, dst, dstCapacity);
@ -2070,3 +2182,12 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
dctx->previousDstEnd = (char*)dst + dSize; dctx->previousDstEnd = (char*)dst + dSize;
return dSize; return dSize;
} }
/* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
{
return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
}

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -64,5 +65,10 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
unsigned tableLog, void* wksp, size_t wkspSize, unsigned tableLog, void* wksp, size_t wkspSize,
int bmi2); int bmi2);
/* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
#endif /* ZSTD_DEC_BLOCK_H */ #endif /* ZSTD_DEC_BLOCK_H */

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -75,12 +76,13 @@ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
typedef struct { typedef struct {
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
U32 rep[ZSTD_REP_NUM]; U32 rep[ZSTD_REP_NUM];
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
} ZSTD_entropyDTables_t; } ZSTD_entropyDTables_t;
@ -164,6 +166,7 @@ struct ZSTD_DCtx_s
ZSTD_dictUses_e dictUses; ZSTD_dictUses_e dictUses;
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
int disableHufAsm;
/* streaming */ /* streaming */
ZSTD_dStreamStage streamStage; ZSTD_dStreamStage streamStage;

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -24,9 +24,6 @@ EXPORT_SYMBOL_GPL(HUF_readStats_wksp);
EXPORT_SYMBOL_GPL(ZSTD_isError); EXPORT_SYMBOL_GPL(ZSTD_isError);
EXPORT_SYMBOL_GPL(ZSTD_getErrorName); EXPORT_SYMBOL_GPL(ZSTD_getErrorName);
EXPORT_SYMBOL_GPL(ZSTD_getErrorCode); EXPORT_SYMBOL_GPL(ZSTD_getErrorCode);
EXPORT_SYMBOL_GPL(ZSTD_customMalloc);
EXPORT_SYMBOL_GPL(ZSTD_customCalloc);
EXPORT_SYMBOL_GPL(ZSTD_customFree);
MODULE_LICENSE("Dual BSD/GPL"); MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("Zstd Common"); MODULE_DESCRIPTION("Zstd Common");

View File

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -113,7 +113,7 @@ EXPORT_SYMBOL(zstd_init_dstream);
size_t zstd_reset_dstream(zstd_dstream *dstream) size_t zstd_reset_dstream(zstd_dstream *dstream)
{ {
return ZSTD_resetDStream(dstream); return ZSTD_DCtx_reset(dstream, ZSTD_reset_session_only);
} }
EXPORT_SYMBOL(zstd_reset_dstream); EXPORT_SYMBOL(zstd_reset_dstream);