zstd: import upstream v1.5.5

Import upstream zstd v1.5.5 to expose upstream's QAT integration.

Import from upstream commit 58b3ef79 [0]. This is one commit before the
tag v1.5.5-kernel [1], which is signed with upstream's signing key. The
next patch in the series imports from v1.5.5-kernel, and is included in
the series, rather than just importing directly from v1.5.5-kernel,
because it is a non-trivial patch applied to improve the kernel's
decompression speed. This commit contains 3 backported patches on top of
v1.5.5: Two from the Linux copy of zstd, and one from upstream's `dev`
branch.

In addition to keeping the kernel's copy of zstd up to date, this update
was requested by Intel to expose upstream zstd's external match provider
API to the kernel, which allows QAT to accelerate the LZ match finding
stage.

This commit was generated by:

  export ZSTD=/path/to/repo/zstd/
  export LINUX=/path/to/repo/linux/
  cd "$ZSTD/contrib/linux-kernel"
  git checkout v1.5.5-kernel~
  make import LINUX="$LINUX"

I tested and benchmarked this commit on x86-64 with gcc-13.2.1 on an
Intel i9-9900K by running my benchmark scripts that benchmark zstd's
performance in btrfs and squashfs compressed filesystems. This commit
improves compression speed, especially for higher compression levels,
and regresses decompression speed. But the decompression speed
regression is addressed by the next patch in the series.

Component,	Level,	C. time delta,	size delta,	D. time delta
Btrfs    ,	    1,	        -1.9%,	     +0.0%,	        +9.5%
Btrfs    ,	    3,	        -5.6%,	     +0.0%,	        +7.4%
Btrfs    ,	    5,	        -4.9%,	     +0.0%,	        +5.0%
Btrfs    ,	    7,	        -5.7%,	     +0.0%,	        +5.2%
Btrfs    ,	    9,	        -5.7%,	     +0.0%,	        +4.0%
Squashfs ,	    1,	          N/A,	      0.0%,	       +11.6%

I also boot tested with a zstd compressed kernel on i386 and aarch64.

Link: 58b3ef79eb
Link: https://github.com/facebook/zstd/tree/v1.5.5-kernel
Signed-off-by: Nick Terrell <terrelln@fb.com>
This commit is contained in:
Nick Terrell 2023-11-16 12:52:21 -08:00
parent 77618db346
commit 98988fc8e9
58 changed files with 4750 additions and 2596 deletions

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -17,8 +18,17 @@
/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ /* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */
#define ZSTDERRORLIB_VISIBILITY #define ZSTDERRORLIB_VISIBLE
#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
#ifndef ZSTDERRORLIB_HIDDEN
# if (__GNUC__ >= 4) && !defined(__MINGW32__)
# define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden")))
# else
# define ZSTDERRORLIB_HIDDEN
# endif
#endif
#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE
/*-********************************************* /*-*********************************************
* Error codes list * Error codes list
@ -43,14 +53,17 @@ typedef enum {
ZSTD_error_frameParameter_windowTooLarge = 16, ZSTD_error_frameParameter_windowTooLarge = 16,
ZSTD_error_corruption_detected = 20, ZSTD_error_corruption_detected = 20,
ZSTD_error_checksum_wrong = 22, ZSTD_error_checksum_wrong = 22,
ZSTD_error_literals_headerWrong = 24,
ZSTD_error_dictionary_corrupted = 30, ZSTD_error_dictionary_corrupted = 30,
ZSTD_error_dictionary_wrong = 32, ZSTD_error_dictionary_wrong = 32,
ZSTD_error_dictionaryCreation_failed = 34, ZSTD_error_dictionaryCreation_failed = 34,
ZSTD_error_parameter_unsupported = 40, ZSTD_error_parameter_unsupported = 40,
ZSTD_error_parameter_combination_unsupported = 41,
ZSTD_error_parameter_outOfBound = 42, ZSTD_error_parameter_outOfBound = 42,
ZSTD_error_tableLog_tooLarge = 44, ZSTD_error_tableLog_tooLarge = 44,
ZSTD_error_maxSymbolValue_tooLarge = 46, ZSTD_error_maxSymbolValue_tooLarge = 46,
ZSTD_error_maxSymbolValue_tooSmall = 48, ZSTD_error_maxSymbolValue_tooSmall = 48,
ZSTD_error_stabilityCondition_notRespected = 50,
ZSTD_error_stage_wrong = 60, ZSTD_error_stage_wrong = 60,
ZSTD_error_init_missing = 62, ZSTD_error_init_missing = 62,
ZSTD_error_memory_allocation = 64, ZSTD_error_memory_allocation = 64,
@ -58,11 +71,15 @@ typedef enum {
ZSTD_error_dstSize_tooSmall = 70, ZSTD_error_dstSize_tooSmall = 70,
ZSTD_error_srcSize_wrong = 72, ZSTD_error_srcSize_wrong = 72,
ZSTD_error_dstBuffer_null = 74, ZSTD_error_dstBuffer_null = 74,
ZSTD_error_noForwardProgress_destFull = 80,
ZSTD_error_noForwardProgress_inputEmpty = 82,
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
ZSTD_error_frameIndex_tooLarge = 100, ZSTD_error_frameIndex_tooLarge = 100,
ZSTD_error_seekableIO = 102, ZSTD_error_seekableIO = 102,
ZSTD_error_dstBuffer_wrong = 104, ZSTD_error_dstBuffer_wrong = 104,
ZSTD_error_srcBuffer_wrong = 105, ZSTD_error_srcBuffer_wrong = 105,
ZSTD_error_sequenceProducer_failed = 106,
ZSTD_error_externalSequences_invalid = 107,
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
} ZSTD_ErrorCode; } ZSTD_ErrorCode;

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause # SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
# ################################################################ # ################################################################
# Copyright (c) Facebook, Inc. # Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved. # All rights reserved.
# #
# This source code is licensed under both the BSD-style license (found in the # This source code is licensed under both the BSD-style license (found in the

View File

@ -0,0 +1,56 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* This file provides custom allocation primitives
*/
#define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
#include "mem.h" /* MEM_STATIC */
#define ZSTD_STATIC_LINKING_ONLY
#include <linux/zstd.h> /* ZSTD_customMem */
#ifndef ZSTD_ALLOCATIONS_H
#define ZSTD_ALLOCATIONS_H
/* custom memory allocation functions */
MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc)
return customMem.customAlloc(customMem.opaque, size);
return ZSTD_malloc(size);
}
MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc) {
/* calloc implemented as malloc+memset;
* not as efficient as calloc, but next best guess for custom malloc */
void* const ptr = customMem.customAlloc(customMem.opaque, size);
ZSTD_memset(ptr, 0, size);
return ptr;
}
return ZSTD_calloc(1, size);
}
MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
{
if (ptr!=NULL) {
if (customMem.customFree)
customMem.customFree(customMem.opaque, ptr);
else
ZSTD_free(ptr);
}
}
#endif /* ZSTD_ALLOCATIONS_H */

149
lib/zstd/common/bits.h Normal file
View File

@ -0,0 +1,149 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_BITS_H
#define ZSTD_BITS_H
#include "mem.h"
MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
{
assert(val != 0);
{
static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7,
26, 12, 18, 6, 11, 5, 10, 9};
return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
}
}
MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
{
assert(val != 0);
# if (__GNUC__ >= 4)
return (unsigned)__builtin_ctz(val);
# else
return ZSTD_countTrailingZeros32_fallback(val);
# endif
}
MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) {
assert(val != 0);
{
static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7,
19, 27, 23, 6, 26, 5, 4, 31};
val |= val >> 1;
val |= val >> 2;
val |= val >> 4;
val |= val >> 8;
val |= val >> 16;
return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
}
}
MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
{
assert(val != 0);
# if (__GNUC__ >= 4)
return (unsigned)__builtin_clz(val);
# else
return ZSTD_countLeadingZeros32_fallback(val);
# endif
}
MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
{
assert(val != 0);
# if (__GNUC__ >= 4) && defined(__LP64__)
return (unsigned)__builtin_ctzll(val);
# else
{
U32 mostSignificantWord = (U32)(val >> 32);
U32 leastSignificantWord = (U32)val;
if (leastSignificantWord == 0) {
return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
} else {
return ZSTD_countTrailingZeros32(leastSignificantWord);
}
}
# endif
}
MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
{
assert(val != 0);
# if (__GNUC__ >= 4)
return (unsigned)(__builtin_clzll(val));
# else
{
U32 mostSignificantWord = (U32)(val >> 32);
U32 leastSignificantWord = (U32)val;
if (mostSignificantWord == 0) {
return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
} else {
return ZSTD_countLeadingZeros32(mostSignificantWord);
}
}
# endif
}
MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
{
if (MEM_isLittleEndian()) {
if (MEM_64bits()) {
return ZSTD_countTrailingZeros64((U64)val) >> 3;
} else {
return ZSTD_countTrailingZeros32((U32)val) >> 3;
}
} else { /* Big Endian CPU */
if (MEM_64bits()) {
return ZSTD_countLeadingZeros64((U64)val) >> 3;
} else {
return ZSTD_countLeadingZeros32((U32)val) >> 3;
}
}
}
MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
{
assert(val != 0);
return 31 - ZSTD_countLeadingZeros32(val);
}
/* ZSTD_rotateRight_*():
* Rotates a bitfield to the right by "count" bits.
* https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
*/
MEM_STATIC
U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
assert(count < 64);
count &= 0x3F; /* for fickle pattern recognition */
return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
}
MEM_STATIC
U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
assert(count < 32);
count &= 0x1F; /* for fickle pattern recognition */
return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
}
MEM_STATIC
U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
assert(count < 16);
count &= 0x0F; /* for fickle pattern recognition */
return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
}
#endif /* ZSTD_BITS_H */

View File

@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* ****************************************************************** /* ******************************************************************
* bitstream * bitstream
* Part of FSE library * Part of FSE library
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -27,6 +28,7 @@
#include "compiler.h" /* UNLIKELY() */ #include "compiler.h" /* UNLIKELY() */
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
#include "error_private.h" /* error codes and messages */ #include "error_private.h" /* error codes and messages */
#include "bits.h" /* ZSTD_highbit32 */
/*========================================= /*=========================================
@ -122,33 +124,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
/* faster, but works only if nbBits >= 1 */ /* faster, but works only if nbBits >= 1 */
/*-**************************************************************
* Internal functions
****************************************************************/
MEM_STATIC unsigned BIT_highbit32 (U32 val)
{
assert(val != 0);
{
# if (__GNUC__ >= 3) /* Use GCC Intrinsic */
return __builtin_clz (val) ^ 31;
# else /* Software version */
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7,
19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
# endif
}
}
/*===== Local Constants =====*/ /*===== Local Constants =====*/
static const unsigned BIT_mask[] = { static const unsigned BIT_mask[] = {
0, 1, 3, 7, 0xF, 0x1F, 0, 1, 3, 7, 0xF, 0x1F,
@ -178,6 +153,12 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
return 0; return 0;
} }
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
{
assert(nbBits < BIT_MASK_SIZE);
return bitContainer & BIT_mask[nbBits];
}
/*! BIT_addBits() : /*! BIT_addBits() :
* can add up to 31 bits into `bitC`. * can add up to 31 bits into `bitC`.
* Note : does not check for register overflow ! */ * Note : does not check for register overflow ! */
@ -187,7 +168,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
assert(nbBits < BIT_MASK_SIZE); assert(nbBits < BIT_MASK_SIZE);
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
bitC->bitPos += nbBits; bitC->bitPos += nbBits;
} }
@ -266,7 +247,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
bitD->bitContainer = MEM_readLEST(bitD->ptr); bitD->bitContainer = MEM_readLEST(bitD->ptr);
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
} else { } else {
bitD->ptr = bitD->start; bitD->ptr = bitD->start;
@ -294,7 +275,7 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
default: break; default: break;
} }
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
} }
bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
@ -325,12 +306,6 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
#endif #endif
} }
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
{
assert(nbBits < BIT_MASK_SIZE);
return bitContainer & BIT_mask[nbBits];
}
/*! BIT_lookBits() : /*! BIT_lookBits() :
* Provides next n bits from local register. * Provides next n bits from local register.
* local register is not modified. * local register is not modified.
@ -377,7 +352,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned n
} }
/*! BIT_readBitsFast() : /*! BIT_readBitsFast() :
* unsafe version; only works only if nbBits >= 1 */ * unsafe version; only works if nbBits >= 1 */
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
{ {
size_t const value = BIT_lookBitsFast(bitD, nbBits); size_t const value = BIT_lookBitsFast(bitD, nbBits);
@ -408,7 +383,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
* This function is safe, it guarantees it will not read beyond src buffer. * This function is safe, it guarantees it will not read beyond src buffer.
* @return : status of `BIT_DStream_t` internal register. * @return : status of `BIT_DStream_t` internal register.
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) MEM_STATIC FORCE_INLINE_ATTR BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
{ {
if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
return BIT_DStream_overflow; return BIT_DStream_overflow;

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -179,6 +180,17 @@
* Sanitizer * Sanitizer
*****************************************************************/ *****************************************************************/
/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an
* abundance of caution, disable our custom poisoning on mingw. */
#ifdef __MINGW32__
#ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE
#define ZSTD_ASAN_DONT_POISON_WORKSPACE 1
#endif
#ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE
#define ZSTD_MSAN_DONT_POISON_WORKSPACE 1
#endif
#endif
#endif /* ZSTD_COMPILER_H */ #endif /* ZSTD_COMPILER_H */

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* ****************************************************************** /* ******************************************************************
* debug * debug
* Part of FSE library * Part of FSE library
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy

View File

@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* ****************************************************************** /* ******************************************************************
* debug * debug
* Part of FSE library * Part of FSE library
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* ****************************************************************** /* ******************************************************************
* Common functions of New Generation Entropy library * Common functions of New Generation Entropy library
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -19,8 +20,8 @@
#include "error_private.h" /* ERR_*, ERROR */ #include "error_private.h" /* ERR_*, ERROR */
#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
#include "fse.h" #include "fse.h"
#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
#include "huf.h" #include "huf.h"
#include "bits.h" /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */
/*=== Version ===*/ /*=== Version ===*/
@ -38,23 +39,6 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
/*-************************************************************** /*-**************************************************************
* FSE NCount encoding-decoding * FSE NCount encoding-decoding
****************************************************************/ ****************************************************************/
static U32 FSE_ctz(U32 val)
{
assert(val != 0);
{
# if (__GNUC__ >= 3) /* GCC Intrinsic */
return __builtin_ctz(val);
# else /* Software version */
U32 count = 0;
while ((val & 1) == 0) {
val >>= 1;
++count;
}
return count;
# endif
}
}
FORCE_INLINE_TEMPLATE FORCE_INLINE_TEMPLATE
size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize) const void* headerBuffer, size_t hbSize)
@ -102,7 +86,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
* repeat. * repeat.
* Avoid UB by setting the high bit to 1. * Avoid UB by setting the high bit to 1.
*/ */
int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; int repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
while (repeats >= 12) { while (repeats >= 12) {
charnum += 3 * 12; charnum += 3 * 12;
if (LIKELY(ip <= iend-7)) { if (LIKELY(ip <= iend-7)) {
@ -113,7 +97,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
ip = iend - 4; ip = iend - 4;
} }
bitStream = MEM_readLE32(ip) >> bitCount; bitStream = MEM_readLE32(ip) >> bitCount;
repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1;
} }
charnum += 3 * repeats; charnum += 3 * repeats;
bitStream >>= 2 * repeats; bitStream >>= 2 * repeats;
@ -178,7 +162,7 @@ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigne
* know that threshold > 1. * know that threshold > 1.
*/ */
if (remaining <= 1) break; if (remaining <= 1) break;
nbBits = BIT_highbit32(remaining) + 1; nbBits = ZSTD_highbit32(remaining) + 1;
threshold = 1 << (nbBits - 1); threshold = 1 << (nbBits - 1);
} }
if (charnum >= maxSV1) break; if (charnum >= maxSV1) break;
@ -253,7 +237,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
const void* src, size_t srcSize) const void* src, size_t srcSize)
{ {
U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0);
} }
FORCE_INLINE_TEMPLATE size_t FORCE_INLINE_TEMPLATE size_t
@ -301,14 +285,14 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
if (weightTotal == 0) return ERROR(corruption_detected); if (weightTotal == 0) return ERROR(corruption_detected);
/* get last non-null symbol weight (implied, total must be 2^n) */ /* get last non-null symbol weight (implied, total must be 2^n) */
{ U32 const tableLog = BIT_highbit32(weightTotal) + 1; { U32 const tableLog = ZSTD_highbit32(weightTotal) + 1;
if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
*tableLogPtr = tableLog; *tableLogPtr = tableLog;
/* determine last weight */ /* determine last weight */
{ U32 const total = 1 << tableLog; { U32 const total = 1 << tableLog;
U32 const rest = total - weightTotal; U32 const rest = total - weightTotal;
U32 const verif = 1 << BIT_highbit32(rest); U32 const verif = 1 << ZSTD_highbit32(rest);
U32 const lastWeight = BIT_highbit32(rest) + 1; U32 const lastWeight = ZSTD_highbit32(rest) + 1;
if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
huffWeight[oSize] = (BYTE)lastWeight; huffWeight[oSize] = (BYTE)lastWeight;
rankStats[lastWeight]++; rankStats[lastWeight]++;
@ -345,13 +329,13 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr, U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize, const void* src, size_t srcSize,
void* workSpace, size_t wkspSize, void* workSpace, size_t wkspSize,
int bmi2) int flags)
{ {
#if DYNAMIC_BMI2 #if DYNAMIC_BMI2
if (bmi2) { if (flags & HUF_flags_bmi2) {
return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
} }
#endif #endif
(void)bmi2; (void)flags;
return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
} }

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -27,9 +28,11 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(version_unsupported): return "Version not supported"; case PREFIX(version_unsupported): return "Version not supported";
case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
case PREFIX(corruption_detected): return "Corrupted block detected"; case PREFIX(corruption_detected): return "Data corruption detected";
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification";
case PREFIX(parameter_unsupported): return "Unsupported parameter"; case PREFIX(parameter_unsupported): return "Unsupported parameter";
case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters";
case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
case PREFIX(init_missing): return "Context should be init first"; case PREFIX(init_missing): return "Context should be init first";
case PREFIX(memory_allocation): return "Allocation error : not enough memory"; case PREFIX(memory_allocation): return "Allocation error : not enough memory";
@ -38,17 +41,22 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
case PREFIX(stabilityCondition_notRespected): return "pledged buffer stability condition is not respected";
case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
case PREFIX(dictionary_wrong): return "Dictionary mismatch"; case PREFIX(dictionary_wrong): return "Dictionary mismatch";
case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
case PREFIX(srcSize_wrong): return "Src size is incorrect"; case PREFIX(srcSize_wrong): return "Src size is incorrect";
case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
case PREFIX(noForwardProgress_destFull): return "Operation made no progress over multiple calls, due to output buffer being full";
case PREFIX(noForwardProgress_inputEmpty): return "Operation made no progress over multiple calls, due to input being empty";
/* following error codes are not stable and may be removed or changed in a future version */ /* following error codes are not stable and may be removed or changed in a future version */
case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code";
case PREFIX(externalSequences_invalid): return "External sequences are not valid";
case PREFIX(maxCode): case PREFIX(maxCode):
default: return notErrorCode; default: return notErrorCode;
} }

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* ****************************************************************** /* ******************************************************************
* FSE : Finite State Entropy codec * FSE : Finite State Entropy codec
* Public Prototypes declaration * Public Prototypes declaration
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -50,34 +51,6 @@
FSE_PUBLIC_API unsigned FSE_versionNumber(void); /*< library version number; to be used when checking dll version */ FSE_PUBLIC_API unsigned FSE_versionNumber(void); /*< library version number; to be used when checking dll version */
/*-****************************************
* FSE simple functions
******************************************/
/*! FSE_compress() :
Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
@return : size of compressed data (<= dstCapacity).
Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
if FSE_isError(return), compression failed (more details using FSE_getErrorName())
*/
FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
/*! FSE_decompress():
Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
into already allocated destination buffer 'dst', of size 'dstCapacity'.
@return : size of regenerated data (<= maxDstSize),
or an error code, which can be tested using FSE_isError() .
** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
Why ? : making this distinction requires a header.
Header management is intentionally delegated to the user layer, which can better manage special cases.
*/
FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity,
const void* cSrc, size_t cSrcSize);
/*-***************************************** /*-*****************************************
* Tool functions * Tool functions
******************************************/ ******************************************/
@ -88,20 +61,6 @@ FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return
FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
/*-*****************************************
* FSE advanced functions
******************************************/
/*! FSE_compress2() :
Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
Both parameters can be defined as '0' to mean : use default value
@return : size of compressed data
Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
if FSE_isError(return), it's an error code.
*/
FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
/*-***************************************** /*-*****************************************
* FSE detailed API * FSE detailed API
******************************************/ ******************************************/
@ -161,8 +120,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
/*! Constructor and Destructor of FSE_CTable. /*! Constructor and Destructor of FSE_CTable.
Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct);
/*! FSE_buildCTable(): /*! FSE_buildCTable():
Builds `ct`, which must be already allocated, using FSE_createCTable(). Builds `ct`, which must be already allocated, using FSE_createCTable().
@ -238,23 +195,7 @@ FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
const void* rBuffer, size_t rBuffSize, int bmi2); const void* rBuffer, size_t rBuffSize, int bmi2);
/*! Constructor and Destructor of FSE_DTable.
Note that its size depends on 'tableLog' */
typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt);
/*! FSE_buildDTable():
Builds 'dt', which must be already allocated, using FSE_createDTable().
return : 0, or an errorCode, which can be tested using FSE_isError() */
FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
/*! FSE_decompress_usingDTable():
Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
into `dst` which must be already allocated.
@return : size of regenerated data (necessarily <= `dstCapacity`),
or an errorCode, which can be tested using FSE_isError() */
FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
/*! /*!
Tutorial : Tutorial :
@ -317,16 +258,6 @@ If there is an error, the function will return an error code, which can be teste
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
/*< same as FSE_optimalTableLog(), which used `minus==2` */ /*< same as FSE_optimalTableLog(), which used `minus==2` */
/* FSE_compress_wksp() :
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
* FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
*/
#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
/*< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
/*< build a fake FSE_CTable, designed to compress always the same symbolValue */ /*< build a fake FSE_CTable, designed to compress always the same symbolValue */
@ -344,19 +275,11 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
/*< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ /*< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); #define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
/*< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
/*< build a fake FSE_DTable, designed to always generate the same symbolValue */
#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
/*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
/*< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */ /*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`.
* Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */
typedef enum { typedef enum {
FSE_repeat_none, /*< Cannot use the previous table */ FSE_repeat_none, /*< Cannot use the previous table */
@ -552,7 +475,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
/* FSE_getMaxNbBits() : /* FSE_getMaxNbBits() :
* Approximate maximum cost of a symbol, in bits. * Approximate maximum cost of a symbol, in bits.
* Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) * Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
* note 1 : assume symbolValue is valid (<= maxSymbolValue) * note 1 : assume symbolValue is valid (<= maxSymbolValue)
* note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* ****************************************************************** /* ******************************************************************
* FSE : Finite State Entropy decoder * FSE : Finite State Entropy decoder
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -24,6 +25,7 @@
#include "error_private.h" #include "error_private.h"
#define ZSTD_DEPS_NEED_MALLOC #define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h" #include "zstd_deps.h"
#include "bits.h" /* ZSTD_highbit32 */
/* ************************************************************** /* **************************************************************
@ -55,19 +57,6 @@
#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
/* Function templates */
FSE_DTable* FSE_createDTable (unsigned tableLog)
{
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
}
void FSE_freeDTable (FSE_DTable* dt)
{
ZSTD_free(dt);
}
static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
{ {
void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
@ -127,10 +116,10 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
} }
} }
/* Now we spread those positions across the table. /* Now we spread those positions across the table.
* The benefit of doing it in two stages is that we avoid the the * The benefit of doing it in two stages is that we avoid the
* variable size inner loop, which caused lots of branch misses. * variable size inner loop, which caused lots of branch misses.
* Now we can run through all the positions without any branch misses. * Now we can run through all the positions without any branch misses.
* We unroll the loop twice, since that is what emperically worked best. * We unroll the loop twice, since that is what empirically worked best.
*/ */
{ {
size_t position = 0; size_t position = 0;
@ -166,7 +155,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
for (u=0; u<tableSize; u++) { for (u=0; u<tableSize; u++) {
FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol); FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
U32 const nextState = symbolNext[symbol]++; U32 const nextState = symbolNext[symbol]++;
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) ); tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
} } } }
@ -184,49 +173,6 @@ size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsi
/*-******************************************************* /*-*******************************************************
* Decompression (Byte symbols) * Decompression (Byte symbols)
*********************************************************/ *********************************************************/
size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
{
void* ptr = dt;
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
void* dPtr = dt + 1;
FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
DTableH->tableLog = 0;
DTableH->fastMode = 0;
cell->newState = 0;
cell->symbol = symbolValue;
cell->nbBits = 0;
return 0;
}
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
{
void* ptr = dt;
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
void* dPtr = dt + 1;
FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
const unsigned tableSize = 1 << nbBits;
const unsigned tableMask = tableSize - 1;
const unsigned maxSV1 = tableMask+1;
unsigned s;
/* Sanity checks */
if (nbBits < 1) return ERROR(GENERIC); /* min size */
/* Build Decoding Table */
DTableH->tableLog = (U16)nbBits;
DTableH->fastMode = 1;
for (s=0; s<maxSV1; s++) {
dinfo[s].newState = 0;
dinfo[s].symbol = (BYTE)s;
dinfo[s].nbBits = (BYTE)nbBits;
}
return 0;
}
FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic( FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
void* dst, size_t maxDstSize, void* dst, size_t maxDstSize,
@ -290,26 +236,6 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
return op-ostart; return op-ostart;
} }
size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
const void* cSrc, size_t cSrcSize,
const FSE_DTable* dt)
{
const void* ptr = dt;
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
const U32 fastMode = DTableH->fastMode;
/* select fast mode (static) */
if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
}
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
{
return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
}
typedef struct { typedef struct {
short ncount[FSE_MAX_SYMBOL_VALUE + 1]; short ncount[FSE_MAX_SYMBOL_VALUE + 1];
FSE_DTable dtable[]; /* Dynamically sized */ FSE_DTable dtable[]; /* Dynamically sized */
@ -342,7 +268,8 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
} }
if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog); assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
@ -382,9 +309,4 @@ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc,
return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
} }
typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
#endif /* FSE_COMMONDEFS_ONLY */ #endif /* FSE_COMMONDEFS_ONLY */

View File

@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* ****************************************************************** /* ******************************************************************
* huff0 huffman codec, * huff0 huffman codec,
* part of Finite State Entropy library * part of Finite State Entropy library
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -18,100 +19,23 @@
/* *** Dependencies *** */ /* *** Dependencies *** */
#include "zstd_deps.h" /* size_t */ #include "zstd_deps.h" /* size_t */
#include "mem.h" /* U32 */
/* *** library symbols visibility *** */
/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
* HUF symbols remain "private" (internal symbols for library only).
* Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
# define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */
# define HUF_PUBLIC_API __declspec(dllexport)
#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
#else
# define HUF_PUBLIC_API
#endif
/* ========================== */
/* *** simple functions *** */
/* ========================== */
/* HUF_compress() :
* Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
* 'dst' buffer must be already allocated.
* Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
* `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
* @return : size of compressed data (<= `dstCapacity`).
* Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
* if HUF_isError(return), compression failed (more details using HUF_getErrorName())
*/
HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
/* HUF_decompress() :
* Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
* into already allocated buffer 'dst', of minimum size 'dstSize'.
* `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
* Note : in contrast with FSE, HUF_decompress can regenerate
* RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
* because it knows size to regenerate (originalSize).
* @return : size of regenerated data (== originalSize),
* or an error code, which can be tested using HUF_isError()
*/
HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize,
const void* cSrc, size_t cSrcSize);
/* *** Tool functions *** */
#define HUF_BLOCKSIZE_MAX (128 * 1024) /*< maximum input size for a single block compressed with HUF_compress */
HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /*< maximum compressed size (worst case) */
/* Error Management */
HUF_PUBLIC_API unsigned HUF_isError(size_t code); /*< tells if a return value is an error code */
HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /*< provides error code string (useful for debugging) */
/* *** Advanced function *** */
/* HUF_compress2() :
* Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
* `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
* `tableLog` must be `<= HUF_TABLELOG_MAX` . */
HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog);
/* HUF_compress4X_wksp() :
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
* `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize);
#endif /* HUF_H_298734234 */
/* ******************************************************************
* WARNING !!
* The following section contains advanced and experimental definitions
* which shall never be used in the context of a dynamic library,
* because they are not guaranteed to remain stable in the future.
* Only consider them in association with static linking.
* *****************************************************************/
#if !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
#define HUF_H_HUF_STATIC_LINKING_ONLY
/* *** Dependencies *** */
#include "mem.h" /* U32 */
#define FSE_STATIC_LINKING_ONLY #define FSE_STATIC_LINKING_ONLY
#include "fse.h" #include "fse.h"
/* *** Tool functions *** */
#define HUF_BLOCKSIZE_MAX (128 * 1024) /*< maximum input size for a single block compressed with HUF_compress */
size_t HUF_compressBound(size_t size); /*< maximum compressed size (worst case) */
/* Error Management */
unsigned HUF_isError(size_t code); /*< tells if a return value is an error code */
const char* HUF_getErrorName(size_t code); /*< provides error code string (useful for debugging) */
#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
/* *** Constants *** */ /* *** Constants *** */
#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */ #define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ #define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
@ -151,25 +75,49 @@ typedef U32 HUF_DTable;
/* **************************************** /* ****************************************
* Advanced decompression functions * Advanced decompression functions
******************************************/ ******************************************/
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */
#endif
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< decodes RLE and uncompressed */ /*
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< considers RLE and uncompressed as errors */ * Huffman flags bitset.
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< considers RLE and uncompressed as errors */ * For all flags, 0 is the default value.
size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ */
size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder */ typedef enum {
#ifndef HUF_FORCE_DECOMPRESS_X1 /*
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ * If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime.
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder */ * Otherwise: Ignored.
#endif */
HUF_flags_bmi2 = (1 << 0),
/*
* If set: Test possible table depths to find the one that produces the smallest header + encoded size.
* If unset: Use heuristic to find the table depth.
*/
HUF_flags_optimalDepth = (1 << 1),
/*
* If set: If the previous table can encode the input, always reuse the previous table.
* If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output.
*/
HUF_flags_preferRepeat = (1 << 2),
/*
* If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress.
* If unset: Always histogram the entire input.
*/
HUF_flags_suspectUncompressible = (1 << 3),
/*
* If set: Don't use assembly implementations
* If unset: Allow using assembly implementations
*/
HUF_flags_disableAsm = (1 << 4),
/*
* If set: Don't use the fast decoding loop, always use the fallback decoding loop.
* If unset: Use the fast decoding loop when possible.
*/
HUF_flags_disableFast = (1 << 5)
} HUF_flags_e;
/* **************************************** /* ****************************************
* HUF detailed API * HUF detailed API
* ****************************************/ * ****************************************/
#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra
/*! HUF_compress() does the following: /*! HUF_compress() does the following:
* 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
@ -182,12 +130,12 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
* For example, it's possible to compress several blocks using the same 'CTable', * For example, it's possible to compress several blocks using the same 'CTable',
* or to save and regenerate 'CTable' using external methods. * or to save and regenerate 'CTable' using external methods.
*/ */
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); unsigned HUF_minTableLog(unsigned symbolCardinality);
size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue);
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace,
size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
@ -196,6 +144,7 @@ typedef enum {
HUF_repeat_check, /*< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ HUF_repeat_check, /*< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
HUF_repeat_valid /*< Can use the previous table and it is assumed to be valid */ HUF_repeat_valid /*< Can use the previous table and it is assumed to be valid */
} HUF_repeat; } HUF_repeat;
/* HUF_compress4X_repeat() : /* HUF_compress4X_repeat() :
* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat. * If it uses hufTable it does not modify hufTable or repeat.
@ -206,13 +155,13 @@ size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog, unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible); HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
/* HUF_buildCTable_wksp() : /* HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer. * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
* `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
*/ */
#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) #define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192)
#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
size_t HUF_buildCTable_wksp (HUF_CElt* tree, size_t HUF_buildCTable_wksp (HUF_CElt* tree,
const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
@ -238,7 +187,7 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize, const void* src, size_t srcSize,
void* workspace, size_t wkspSize, void* workspace, size_t wkspSize,
int bmi2); int flags);
/* HUF_readCTable() : /* HUF_readCTable() :
* Loading a CTable saved with HUF_writeCTable() */ * Loading a CTable saved with HUF_writeCTable() */
@ -276,32 +225,12 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9)) #define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
#endif
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif
/* ====================== */ /* ====================== */
/* single stream variants */ /* single stream variants */
/* ====================== */ /* ====================== */
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags);
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
/* HUF_compress1X_repeat() : /* HUF_compress1X_repeat() :
* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
* If it uses hufTable it does not modify hufTable or repeat. * If it uses hufTable it does not modify hufTable or repeat.
@ -312,47 +241,28 @@ size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned tableLog, unsigned maxSymbolValue, unsigned tableLog,
void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible); HUF_CElt* hufTable, HUF_repeat* repeat, int flags);
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
#ifndef HUF_FORCE_DECOMPRESS_X1 #ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); /*< double-symbols decoder */
#endif
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder */
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder */
#endif
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /*< automatic selection of sing or double symbol decoder, based on DTable */
#ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif
#ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
#endif #endif
/* BMI2 variants. /* BMI2 variants.
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
*/ */
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
#ifndef HUF_FORCE_DECOMPRESS_X2 #ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
#endif #endif
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags);
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags);
#ifndef HUF_FORCE_DECOMPRESS_X2 #ifndef HUF_FORCE_DECOMPRESS_X2
size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
#endif #endif
#ifndef HUF_FORCE_DECOMPRESS_X1 #ifndef HUF_FORCE_DECOMPRESS_X1
size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags);
#endif #endif
#endif /* HUF_STATIC_LINKING_ONLY */ #endif /* HUF_H_298734234 */

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -12,7 +13,7 @@
#define ZSTD_PORTABILITY_MACROS_H #define ZSTD_PORTABILITY_MACROS_H
/* /*
* This header file contains macro defintions to support portability. * This header file contains macro definitions to support portability.
* This header is shared between C and ASM code, so it MUST only * This header is shared between C and ASM code, so it MUST only
* contain macro definitions. It MUST not contain any C code. * contain macro definitions. It MUST not contain any C code.
* *
@ -65,7 +66,7 @@
#endif #endif
/* /*
* Only enable assembly for GNUC comptabile compilers, * Only enable assembly for GNUC compatible compilers,
* because other platforms may not support GAS assembly syntax. * because other platforms may not support GAS assembly syntax.
* *
* Only enable assembly for Linux / MacOS, other platforms may * Only enable assembly for Linux / MacOS, other platforms may
@ -90,4 +91,23 @@
*/ */
#define ZSTD_ENABLE_ASM_X86_64_BMI2 0 #define ZSTD_ENABLE_ASM_X86_64_BMI2 0
/*
* For x86 ELF targets, add .note.gnu.property section for Intel CET in
* assembly sources when CET is enabled.
*
* Additionally, any function that may be called indirectly must begin
* with ZSTD_CET_ENDBRANCH.
*/
#if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \
&& defined(__has_include)
# if __has_include(<cet.h>)
# include <cet.h>
# define ZSTD_CET_ENDBRANCH _CET_ENDBR
# endif
#endif
#ifndef ZSTD_CET_ENDBRANCH
# define ZSTD_CET_ENDBRANCH
#endif
#endif /* ZSTD_PORTABILITY_MACROS_H */ #endif /* ZSTD_PORTABILITY_MACROS_H */

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -14,7 +15,6 @@
* Dependencies * Dependencies
***************************************/ ***************************************/
#define ZSTD_DEPS_NEED_MALLOC #define ZSTD_DEPS_NEED_MALLOC
#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
#include "error_private.h" #include "error_private.h"
#include "zstd_internal.h" #include "zstd_internal.h"
@ -47,37 +47,3 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
/*! ZSTD_getErrorString() : /*! ZSTD_getErrorString() :
* provides error code string from enum */ * provides error code string from enum */
const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
/*=**************************************************************
* Custom allocator
****************************************************************/
void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc)
return customMem.customAlloc(customMem.opaque, size);
return ZSTD_malloc(size);
}
void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
{
if (customMem.customAlloc) {
/* calloc implemented as malloc+memset;
* not as efficient as calloc, but next best guess for custom malloc */
void* const ptr = customMem.customAlloc(customMem.opaque, size);
ZSTD_memset(ptr, 0, size);
return ptr;
}
return ZSTD_calloc(1, size);
}
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
{
if (ptr!=NULL) {
if (customMem.customFree)
customMem.customFree(customMem.opaque, ptr);
else
ZSTD_free(ptr);
}
}

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -105,3 +105,17 @@ static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) {
#endif /* ZSTD_DEPS_IO */ #endif /* ZSTD_DEPS_IO */
#endif /* ZSTD_DEPS_NEED_IO */ #endif /* ZSTD_DEPS_NEED_IO */
/*
* Only requested when MSAN is enabled.
* Need:
* intptr_t
*/
#ifdef ZSTD_DEPS_NEED_STDINT
#ifndef ZSTD_DEPS_STDINT
#define ZSTD_DEPS_STDINT
/* intptr_t already provided by ZSTD_DEPS_COMMON */
#endif /* ZSTD_DEPS_STDINT */
#endif /* ZSTD_DEPS_NEED_STDINT */

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -28,7 +29,6 @@
#include <linux/zstd.h> #include <linux/zstd.h>
#define FSE_STATIC_LINKING_ONLY #define FSE_STATIC_LINKING_ONLY
#include "fse.h" #include "fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "huf.h" #include "huf.h"
#include <linux/xxhash.h> /* XXH_reset, update, digest */ #include <linux/xxhash.h> /* XXH_reset, update, digest */
#define ZSTD_TRACE 0 #define ZSTD_TRACE 0
@ -83,9 +83,9 @@ typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
#define ZSTD_FRAMECHECKSUMSIZE 4 #define ZSTD_FRAMECHECKSUMSIZE 4
#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */
#define MIN_LITERALS_FOR_4_STREAMS 6
#define HufLog 12
typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
#define LONGNBSEQ 0x7F00 #define LONGNBSEQ 0x7F00
@ -93,6 +93,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
#define MINMATCH 3 #define MINMATCH 3
#define Litbits 8 #define Litbits 8
#define LitHufLog 11
#define MaxLit ((1<<Litbits) - 1) #define MaxLit ((1<<Litbits) - 1)
#define MaxML 52 #define MaxML 52
#define MaxLL 35 #define MaxLL 35
@ -103,6 +104,8 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
#define LLFSELog 9 #define LLFSELog 9
#define OffFSELog 8 #define OffFSELog 8
#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog) #define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
#define MaxMLBits 16
#define MaxLLBits 16
#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */ #define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
/* Each table cannot take more than #symbols * FSELog bits */ /* Each table cannot take more than #symbols * FSELog bits */
@ -225,12 +228,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
* one COPY16() in the first call. Then, do two calls per loop since * one COPY16() in the first call. Then, do two calls per loop since
* at that point it is more likely to have a high trip count. * at that point it is more likely to have a high trip count.
*/ */
#ifdef __aarch64__
do {
COPY16(op, ip);
}
while (op < oend);
#else
ZSTD_copy16(op, ip); ZSTD_copy16(op, ip);
if (16 >= length) return; if (16 >= length) return;
op += 16; op += 16;
@ -240,7 +237,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
COPY16(op, ip); COPY16(op, ip);
} }
while (op < oend); while (op < oend);
#endif
} }
} }
@ -289,11 +285,11 @@ typedef enum {
typedef struct { typedef struct {
seqDef* sequencesStart; seqDef* sequencesStart;
seqDef* sequences; /* ptr to end of sequences */ seqDef* sequences; /* ptr to end of sequences */
BYTE* litStart; BYTE* litStart;
BYTE* lit; /* ptr to end of literals */ BYTE* lit; /* ptr to end of literals */
BYTE* llCode; BYTE* llCode;
BYTE* mlCode; BYTE* mlCode;
BYTE* ofCode; BYTE* ofCode;
size_t maxNbSeq; size_t maxNbSeq;
size_t maxNbLit; size_t maxNbLit;
@ -301,8 +297,8 @@ typedef struct {
* in the seqStore that has a value larger than U16 (if it exists). To do so, we increment * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
* the existing value of the litLength or matchLength by 0x10000. * the existing value of the litLength or matchLength by 0x10000.
*/ */
ZSTD_longLengthType_e longLengthType; ZSTD_longLengthType_e longLengthType;
U32 longLengthPos; /* Index of the sequence to apply long length modification to */ U32 longLengthPos; /* Index of the sequence to apply long length modification to */
} seqStore_t; } seqStore_t;
typedef struct { typedef struct {
@ -321,10 +317,10 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
seqLen.matchLength = seq->mlBase + MINMATCH; seqLen.matchLength = seq->mlBase + MINMATCH;
if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
if (seqStore->longLengthType == ZSTD_llt_literalLength) { if (seqStore->longLengthType == ZSTD_llt_literalLength) {
seqLen.litLength += 0xFFFF; seqLen.litLength += 0x10000;
} }
if (seqStore->longLengthType == ZSTD_llt_matchLength) { if (seqStore->longLengthType == ZSTD_llt_matchLength) {
seqLen.matchLength += 0xFFFF; seqLen.matchLength += 0x10000;
} }
} }
return seqLen; return seqLen;
@ -337,72 +333,13 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
* `decompressedBound != ZSTD_CONTENTSIZE_ERROR` * `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
*/ */
typedef struct { typedef struct {
size_t nbBlocks;
size_t compressedSize; size_t compressedSize;
unsigned long long decompressedBound; unsigned long long decompressedBound;
} ZSTD_frameSizeInfo; /* decompress & legacy */ } ZSTD_frameSizeInfo; /* decompress & legacy */
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ int ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
/* custom memory allocation functions */
void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
{
assert(val != 0);
{
# if (__GNUC__ >= 3) /* GCC Intrinsic */
return __builtin_clz (val) ^ 31;
# else /* Software version */
static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
# endif
}
}
/*
* Counts the number of trailing zeros of a `size_t`.
* Most compilers should support CTZ as a builtin. A backup
* implementation is provided if the builtin isn't supported, but
* it may not be terribly efficient.
*/
MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
{
if (MEM_64bits()) {
# if (__GNUC__ >= 4)
return __builtin_ctzll((U64)val);
# else
static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19,
4, 25, 14, 28, 9, 34, 20, 56,
5, 17, 26, 54, 15, 41, 29, 43,
10, 31, 38, 35, 21, 45, 49, 57,
63, 6, 12, 18, 24, 27, 33, 55,
16, 53, 40, 42, 30, 37, 44, 48,
62, 11, 23, 32, 52, 39, 36, 47,
61, 22, 51, 46, 60, 50, 59, 58 };
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
} else { /* 32 bits */
# if (__GNUC__ >= 3)
return __builtin_ctz((U32)val);
# else
static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3,
30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7,
26, 12, 18, 6, 11, 5, 10, 9 };
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
}
/* ZSTD_invalidateRepCodes() : /* ZSTD_invalidateRepCodes() :

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* ****************************************************************** /* ******************************************************************
* FSE : Finite State Entropy encoder * FSE : Finite State Entropy encoder
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -26,6 +27,7 @@
#define ZSTD_DEPS_NEED_MALLOC #define ZSTD_DEPS_NEED_MALLOC
#define ZSTD_DEPS_NEED_MATH64 #define ZSTD_DEPS_NEED_MATH64
#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */ #include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
#include "../common/bits.h" /* ZSTD_highbit32 */
/* ************************************************************** /* **************************************************************
@ -90,7 +92,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
assert(tableLog < 16); /* required for threshold strategy to work */ assert(tableLog < 16); /* required for threshold strategy to work */
/* For explanations on how to distribute symbol values over the table : /* For explanations on how to distribute symbol values over the table :
* http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ * https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
#ifdef __clang_analyzer__ #ifdef __clang_analyzer__
ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
@ -191,7 +193,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
break; break;
default : default :
assert(normalizedCounter[s] > 1); assert(normalizedCounter[s] > 1);
{ U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1); { U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);
U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut; U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]); symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
@ -342,21 +344,11 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize,
* FSE Compression Code * FSE Compression Code
****************************************************************/ ****************************************************************/
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
{
size_t size;
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
return (FSE_CTable*)ZSTD_malloc(size);
}
void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
/* provides the minimum logSize to safely represent a distribution */ /* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
{ {
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
assert(srcSize > 1); /* Not supported, RLE should be used instead */ assert(srcSize > 1); /* Not supported, RLE should be used instead */
return minBits; return minBits;
@ -364,7 +356,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
{ {
U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;
U32 tableLog = maxTableLog; U32 tableLog = maxTableLog;
U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
assert(srcSize > 1); /* Not supported, RLE should be used instead */ assert(srcSize > 1); /* Not supported, RLE should be used instead */
@ -532,40 +524,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
return tableLog; return tableLog;
} }
/* fake FSE_CTable, for raw (uncompressed) input */
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
{
const unsigned tableSize = 1 << nbBits;
const unsigned tableMask = tableSize - 1;
const unsigned maxSymbolValue = tableMask;
void* const ptr = ct;
U16* const tableU16 = ( (U16*) ptr) + 2;
void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */
FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
unsigned s;
/* Sanity checks */
if (nbBits < 1) return ERROR(GENERIC); /* min size */
/* header */
tableU16[-2] = (U16) nbBits;
tableU16[-1] = (U16) maxSymbolValue;
/* Build table */
for (s=0; s<tableSize; s++)
tableU16[s] = (U16)(tableSize + s);
/* Build Symbol Transformation Table */
{ const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
for (s=0; s<=maxSymbolValue; s++) {
symbolTT[s].deltaNbBits = deltaNbBits;
symbolTT[s].deltaFindState = s-1;
} }
return 0;
}
/* fake FSE_CTable, for rle input (always same symbol) */ /* fake FSE_CTable, for rle input (always same symbol) */
size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
{ {
@ -664,5 +622,4 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
#endif /* FSE_COMMONDEFS_ONLY */ #endif /* FSE_COMMONDEFS_ONLY */

View File

@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* ****************************************************************** /* ******************************************************************
* hist : Histogram functions * hist : Histogram functions
* part of Finite State Entropy project * part of Finite State Entropy project
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy

View File

@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* ****************************************************************** /* ******************************************************************
* hist : Histogram functions * hist : Histogram functions
* part of Finite State Entropy project * part of Finite State Entropy project
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* ****************************************************************** /* ******************************************************************
* Huffman encoder, part of New Generation Entropy library * Huffman encoder, part of New Generation Entropy library
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* *
* You can contact the author at : * You can contact the author at :
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@ -26,9 +27,9 @@
#include "hist.h" #include "hist.h"
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
#include "../common/fse.h" /* header compression */ #include "../common/fse.h" /* header compression */
#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h" #include "../common/huf.h"
#include "../common/error_private.h" #include "../common/error_private.h"
#include "../common/bits.h" /* ZSTD_highbit32 */
/* ************************************************************** /* **************************************************************
@ -39,13 +40,67 @@
/* ************************************************************** /* **************************************************************
* Utils * Required declarations
****************************************************************/ ****************************************************************/
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) typedef struct nodeElt_s {
U32 count;
U16 parent;
BYTE byte;
BYTE nbBits;
} nodeElt;
/* **************************************************************
* Debug Traces
****************************************************************/
#if DEBUGLEVEL >= 2
static size_t showU32(const U32* arr, size_t size)
{ {
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); size_t u;
for (u=0; u<size; u++) {
RAWLOG(6, " %u", arr[u]); (void)arr;
}
RAWLOG(6, " \n");
return size;
} }
static size_t HUF_getNbBits(HUF_CElt elt);
static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
{
size_t u;
for (u=0; u<size; u++) {
RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
}
RAWLOG(6, " \n");
return size;
}
static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
{
size_t u;
for (u=0; u<size; u++) {
RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
}
RAWLOG(6, " \n");
return size;
}
static size_t showHNodeBits(const nodeElt* hnode, size_t size)
{
size_t u;
for (u=0; u<size; u++) {
RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
}
RAWLOG(6, " \n");
return size;
}
#endif
/* ******************************************************* /* *******************************************************
* HUF : Huffman block compression * HUF : Huffman block compression
@ -86,7 +141,10 @@ typedef struct {
S16 norm[HUF_TABLELOG_MAX+1]; S16 norm[HUF_TABLELOG_MAX+1];
} HUF_CompressWeightsWksp; } HUF_CompressWeightsWksp;
static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize) static size_t
HUF_compressWeights(void* dst, size_t dstSize,
const void* weightTable, size_t wtSize,
void* workspace, size_t workspaceSize)
{ {
BYTE* const ostart = (BYTE*) dst; BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart; BYTE* op = ostart;
@ -137,7 +195,7 @@ static size_t HUF_getNbBitsFast(HUF_CElt elt)
static size_t HUF_getValue(HUF_CElt elt) static size_t HUF_getValue(HUF_CElt elt)
{ {
return elt & ~0xFF; return elt & ~(size_t)0xFF;
} }
static size_t HUF_getValueFast(HUF_CElt elt) static size_t HUF_getValueFast(HUF_CElt elt)
@ -175,6 +233,8 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
U32 n; U32 n;
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32)); HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
/* check conditions */ /* check conditions */
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
@ -204,16 +264,6 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
return ((maxSymbolValue+1)/2) + 1; return ((maxSymbolValue+1)/2) + 1;
} }
/*! HUF_writeCTable() :
`CTable` : Huffman tree to save, using huf representation.
@return : size of saved CTable */
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
{
HUF_WriteCTableWksp wksp;
return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
}
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
{ {
@ -269,68 +319,64 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue) U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
{ {
const HUF_CElt* ct = CTable + 1; const HUF_CElt* const ct = CTable + 1;
assert(symbolValue <= HUF_SYMBOLVALUE_MAX); assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
return (U32)HUF_getNbBits(ct[symbolValue]); return (U32)HUF_getNbBits(ct[symbolValue]);
} }
typedef struct nodeElt_s {
U32 count;
U16 parent;
BYTE byte;
BYTE nbBits;
} nodeElt;
/* /*
* HUF_setMaxHeight(): * HUF_setMaxHeight():
* Enforces maxNbBits on the Huffman tree described in huffNode. * Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
* *
* It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts * It attempts to convert all nodes with nbBits > @targetNbBits
* the tree to so that it is a valid canonical Huffman tree. * to employ @targetNbBits instead. Then it adjusts the tree
* so that it remains a valid canonical Huffman tree.
* *
* @pre The sum of the ranks of each symbol == 2^largestBits, * @pre The sum of the ranks of each symbol == 2^largestBits,
* where largestBits == huffNode[lastNonNull].nbBits. * where largestBits == huffNode[lastNonNull].nbBits.
* @post The sum of the ranks of each symbol == 2^largestBits, * @post The sum of the ranks of each symbol == 2^largestBits,
* where largestBits is the return value <= maxNbBits. * where largestBits is the return value (expected <= targetNbBits).
* *
* @param huffNode The Huffman tree modified in place to enforce maxNbBits. * @param huffNode The Huffman tree modified in place to enforce targetNbBits.
* It's presumed sorted, from most frequent to rarest symbol.
* @param lastNonNull The symbol with the lowest count in the Huffman tree. * @param lastNonNull The symbol with the lowest count in the Huffman tree.
* @param maxNbBits The maximum allowed number of bits, which the Huffman tree * @param targetNbBits The allowed number of bits, which the Huffman tree
* may not respect. After this function the Huffman tree will * may not respect. After this function the Huffman tree will
* respect maxNbBits. * respect targetNbBits.
* @return The maximum number of bits of the Huffman tree after adjustment, * @return The maximum number of bits of the Huffman tree after adjustment.
* necessarily no more than maxNbBits.
*/ */
static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
{ {
const U32 largestBits = huffNode[lastNonNull].nbBits; const U32 largestBits = huffNode[lastNonNull].nbBits;
/* early exit : no elt > maxNbBits, so the tree is already valid. */ /* early exit : no elt > targetNbBits, so the tree is already valid. */
if (largestBits <= maxNbBits) return largestBits; if (largestBits <= targetNbBits) return largestBits;
DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
/* there are several too large elements (at least >= 2) */ /* there are several too large elements (at least >= 2) */
{ int totalCost = 0; { int totalCost = 0;
const U32 baseCost = 1 << (largestBits - maxNbBits); const U32 baseCost = 1 << (largestBits - targetNbBits);
int n = (int)lastNonNull; int n = (int)lastNonNull;
/* Adjust any ranks > maxNbBits to maxNbBits. /* Adjust any ranks > targetNbBits to targetNbBits.
* Compute totalCost, which is how far the sum of the ranks is * Compute totalCost, which is how far the sum of the ranks is
* we are over 2^largestBits after adjust the offending ranks. * we are over 2^largestBits after adjust the offending ranks.
*/ */
while (huffNode[n].nbBits > maxNbBits) { while (huffNode[n].nbBits > targetNbBits) {
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
huffNode[n].nbBits = (BYTE)maxNbBits; huffNode[n].nbBits = (BYTE)targetNbBits;
n--; n--;
} }
/* n stops at huffNode[n].nbBits <= maxNbBits */ /* n stops at huffNode[n].nbBits <= targetNbBits */
assert(huffNode[n].nbBits <= maxNbBits); assert(huffNode[n].nbBits <= targetNbBits);
/* n end at index of smallest symbol using < maxNbBits */ /* n end at index of smallest symbol using < targetNbBits */
while (huffNode[n].nbBits == maxNbBits) --n; while (huffNode[n].nbBits == targetNbBits) --n;
/* renorm totalCost from 2^largestBits to 2^maxNbBits /* renorm totalCost from 2^largestBits to 2^targetNbBits
* note : totalCost is necessarily a multiple of baseCost */ * note : totalCost is necessarily a multiple of baseCost */
assert((totalCost & (baseCost - 1)) == 0); assert(((U32)totalCost & (baseCost - 1)) == 0);
totalCost >>= (largestBits - maxNbBits); totalCost >>= (largestBits - targetNbBits);
assert(totalCost > 0); assert(totalCost > 0);
/* repay normalized cost */ /* repay normalized cost */
@ -339,19 +385,19 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
/* Get pos of last (smallest = lowest cum. count) symbol per rank */ /* Get pos of last (smallest = lowest cum. count) symbol per rank */
ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
{ U32 currentNbBits = maxNbBits; { U32 currentNbBits = targetNbBits;
int pos; int pos;
for (pos=n ; pos >= 0; pos--) { for (pos=n ; pos >= 0; pos--) {
if (huffNode[pos].nbBits >= currentNbBits) continue; if (huffNode[pos].nbBits >= currentNbBits) continue;
currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */
rankLast[maxNbBits-currentNbBits] = (U32)pos; rankLast[targetNbBits-currentNbBits] = (U32)pos;
} } } }
while (totalCost > 0) { while (totalCost > 0) {
/* Try to reduce the next power of 2 above totalCost because we /* Try to reduce the next power of 2 above totalCost because we
* gain back half the rank. * gain back half the rank.
*/ */
U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
U32 const highPos = rankLast[nBitsToDecrease]; U32 const highPos = rankLast[nBitsToDecrease];
U32 const lowPos = rankLast[nBitsToDecrease-1]; U32 const lowPos = rankLast[nBitsToDecrease-1];
@ -391,7 +437,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
rankLast[nBitsToDecrease] = noSymbol; rankLast[nBitsToDecrease] = noSymbol;
else { else {
rankLast[nBitsToDecrease]--; rankLast[nBitsToDecrease]--;
if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
} }
} /* while (totalCost > 0) */ } /* while (totalCost > 0) */
@ -403,11 +449,11 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
* TODO. * TODO.
*/ */
while (totalCost < 0) { /* Sometimes, cost correction overshoot */ while (totalCost < 0) { /* Sometimes, cost correction overshoot */
/* special case : no rank 1 symbol (using maxNbBits-1); /* special case : no rank 1 symbol (using targetNbBits-1);
* let's create one from largest rank 0 (using maxNbBits). * let's create one from largest rank 0 (using targetNbBits).
*/ */
if (rankLast[1] == noSymbol) { if (rankLast[1] == noSymbol) {
while (huffNode[n].nbBits == maxNbBits) n--; while (huffNode[n].nbBits == targetNbBits) n--;
huffNode[n+1].nbBits--; huffNode[n+1].nbBits--;
assert(n >= 0); assert(n >= 0);
rankLast[1] = (U32)(n+1); rankLast[1] = (U32)(n+1);
@ -421,7 +467,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
} /* repay normalized cost */ } /* repay normalized cost */
} /* there are several too large elements (at least >= 2) */ } /* there are several too large elements (at least >= 2) */
return maxNbBits; return targetNbBits;
} }
typedef struct { typedef struct {
@ -429,7 +475,7 @@ typedef struct {
U16 curr; U16 curr;
} rankPos; } rankPos;
typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];
/* Number of buckets available for HUF_sort() */ /* Number of buckets available for HUF_sort() */
#define RANK_POSITION_TABLE_SIZE 192 #define RANK_POSITION_TABLE_SIZE 192
@ -448,8 +494,8 @@ typedef struct {
* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing. * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
*/ */
#define RANK_POSITION_MAX_COUNT_LOG 32 #define RANK_POSITION_MAX_COUNT_LOG 32
#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */ #define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */ #define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)
/* Return the appropriate bucket index for a given count. See definition of /* Return the appropriate bucket index for a given count. See definition of
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy. * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
@ -457,7 +503,7 @@ typedef struct {
static U32 HUF_getIndex(U32 const count) { static U32 HUF_getIndex(U32 const count) {
return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF) return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
? count ? count
: BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN; : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
} }
/* Helper swap function for HUF_quickSortPartition() */ /* Helper swap function for HUF_quickSortPartition() */
@ -580,7 +626,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy
/* Sort each bucket. */ /* Sort each bucket. */
for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) { for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base; int const bucketSize = rankPosition[n].curr - rankPosition[n].base;
U32 const bucketStartIdx = rankPosition[n].base; U32 const bucketStartIdx = rankPosition[n].base;
if (bucketSize > 1) { if (bucketSize > 1) {
assert(bucketStartIdx < maxSymbolValue1); assert(bucketStartIdx < maxSymbolValue1);
@ -591,6 +637,7 @@ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSy
assert(HUF_isSorted(huffNode, maxSymbolValue1)); assert(HUF_isSorted(huffNode, maxSymbolValue1));
} }
/* HUF_buildCTable_wksp() : /* HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer. * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
@ -611,6 +658,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
int lowS, lowN; int lowS, lowN;
int nodeNb = STARTNODE; int nodeNb = STARTNODE;
int n, nodeRoot; int n, nodeRoot;
DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
/* init for parents */ /* init for parents */
nonNullRank = (int)maxSymbolValue; nonNullRank = (int)maxSymbolValue;
while(huffNode[nonNullRank].count == 0) nonNullRank--; while(huffNode[nonNullRank].count == 0) nonNullRank--;
@ -637,6 +685,8 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
for (n=0; n<=nonNullRank; n++) for (n=0; n<=nonNullRank; n++)
huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
return nonNullRank; return nonNullRank;
} }
@ -674,28 +724,36 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
CTable[0] = maxNbBits; CTable[0] = maxNbBits;
} }
size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) size_t
HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
void* workSpace, size_t wkspSize)
{ {
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32)); HUF_buildCTable_wksp_tables* const wksp_tables =
(HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl; nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
nodeElt* const huffNode = huffNode0+1; nodeElt* const huffNode = huffNode0+1;
int nonNullRank; int nonNullRank;
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));
DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
/* safety checks */ /* safety checks */
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
return ERROR(workSpace_tooSmall); return ERROR(workSpace_tooSmall);
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
return ERROR(maxSymbolValue_tooLarge); return ERROR(maxSymbolValue_tooLarge);
ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
/* sort, decreasing order */ /* sort, decreasing order */
HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
/* build tree */ /* build tree */
nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
/* enforce maxTableLog */ /* determine and enforce maxTableLog */
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
@ -804,7 +862,7 @@ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int id
#if DEBUGLEVEL >= 1 #if DEBUGLEVEL >= 1
{ {
size_t const nbBits = HUF_getNbBits(elt); size_t const nbBits = HUF_getNbBits(elt);
size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1; size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
(void)dirtyBits; (void)dirtyBits;
/* Middle bits are 0. */ /* Middle bits are 0. */
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0); assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
@ -884,7 +942,7 @@ static size_t HUF_closeCStream(HUF_CStream_t* bitC)
{ {
size_t const nbBits = bitC->bitPos[0] & 0xFF; size_t const nbBits = bitC->bitPos[0] & 0xFF;
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
return (bitC->ptr - bitC->startPtr) + (nbBits > 0); return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);
} }
} }
@ -1045,9 +1103,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
static size_t static size_t
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
const HUF_CElt* CTable, const int bmi2) const HUF_CElt* CTable, const int flags)
{ {
if (bmi2) { if (flags & HUF_flags_bmi2) {
return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
} }
return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
@ -1058,28 +1116,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
static size_t static size_t
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
const HUF_CElt* CTable, const int bmi2) const HUF_CElt* CTable, const int flags)
{ {
(void)bmi2; (void)flags;
return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
} }
#endif #endif
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
{ {
return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
}
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
{
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
} }
static size_t static size_t
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
const HUF_CElt* CTable, int bmi2) const HUF_CElt* CTable, int flags)
{ {
size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
const BYTE* ip = (const BYTE*) src; const BYTE* ip = (const BYTE*) src;
@ -1093,7 +1146,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
op += 6; /* jumpTable */ op += 6; /* jumpTable */
assert(op <= oend); assert(op <= oend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0; if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart, (U16)cSize); MEM_writeLE16(ostart, (U16)cSize);
op += cSize; op += cSize;
@ -1101,7 +1154,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize; ip += segmentSize;
assert(op <= oend); assert(op <= oend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0; if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart+2, (U16)cSize); MEM_writeLE16(ostart+2, (U16)cSize);
op += cSize; op += cSize;
@ -1109,7 +1162,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize; ip += segmentSize;
assert(op <= oend); assert(op <= oend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0; if (cSize == 0 || cSize > 65535) return 0;
MEM_writeLE16(ostart+4, (U16)cSize); MEM_writeLE16(ostart+4, (U16)cSize);
op += cSize; op += cSize;
@ -1118,7 +1171,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
ip += segmentSize; ip += segmentSize;
assert(op <= oend); assert(op <= oend);
assert(ip <= iend); assert(ip <= iend);
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );
if (cSize == 0 || cSize > 65535) return 0; if (cSize == 0 || cSize > 65535) return 0;
op += cSize; op += cSize;
} }
@ -1126,14 +1179,9 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
return (size_t)(op-ostart); return (size_t)(op-ostart);
} }
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
{ {
return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
}
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
{
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
} }
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
@ -1141,11 +1189,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
static size_t HUF_compressCTable_internal( static size_t HUF_compressCTable_internal(
BYTE* const ostart, BYTE* op, BYTE* const oend, BYTE* const ostart, BYTE* op, BYTE* const oend,
const void* src, size_t srcSize, const void* src, size_t srcSize,
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
{ {
size_t const cSize = (nbStreams==HUF_singleStream) ? size_t const cSize = (nbStreams==HUF_singleStream) ?
HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :
HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);
if (HUF_isError(cSize)) { return cSize; } if (HUF_isError(cSize)) { return cSize; }
if (cSize==0) { return 0; } /* uncompressible */ if (cSize==0) { return 0; } /* uncompressible */
op += cSize; op += cSize;
@ -1168,6 +1216,79 @@ typedef struct {
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096 #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
{
unsigned cardinality = 0;
unsigned i;
for (i = 0; i < maxSymbolValue + 1; i++) {
if (count[i] != 0) cardinality += 1;
}
return cardinality;
}
unsigned HUF_minTableLog(unsigned symbolCardinality)
{
U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;
return minBitsSymbols;
}
unsigned HUF_optimalTableLog(
unsigned maxTableLog,
size_t srcSize,
unsigned maxSymbolValue,
void* workSpace, size_t wkspSize,
HUF_CElt* table,
const unsigned* count,
int flags)
{
assert(srcSize > 1); /* Not supported, RLE should be used instead */
assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
if (!(flags & HUF_flags_optimalDepth)) {
/* cheap evaluation, based on FSE */
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
}
{ BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
size_t maxBits, hSize, newSize;
const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
size_t optSize = ((size_t) ~0) - 1;
unsigned optLog = maxTableLog, optLogGuess;
DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
/* Search until size increases */
for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
if (ERR_isError(maxBits)) continue;
if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
if (ERR_isError(hSize)) continue;
newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
if (newSize > optSize + 1) {
break;
}
if (newSize < optSize) {
optSize = newSize;
optLog = optLogGuess;
}
}
assert(optLog <= HUF_TABLELOG_MAX);
return optLog;
}
}
/* HUF_compress_internal() : /* HUF_compress_internal() :
* `workSpace_align4` must be aligned on 4-bytes boundaries, * `workSpace_align4` must be aligned on 4-bytes boundaries,
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */ * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
@ -1177,14 +1298,14 @@ HUF_compress_internal (void* dst, size_t dstSize,
unsigned maxSymbolValue, unsigned huffLog, unsigned maxSymbolValue, unsigned huffLog,
HUF_nbStreams_e nbStreams, HUF_nbStreams_e nbStreams,
void* workSpace, size_t wkspSize, void* workSpace, size_t wkspSize,
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
const int bmi2, unsigned suspectUncompressible)
{ {
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t)); HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize; BYTE* const oend = ostart + dstSize;
BYTE* op = ostart; BYTE* op = ostart;
DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);
HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE); HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
/* checks & inits */ /* checks & inits */
@ -1198,16 +1319,17 @@ HUF_compress_internal (void* dst, size_t dstSize,
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
/* Heuristic : If old table is valid, use it for small inputs */ /* Heuristic : If old table is valid, use it for small inputs */
if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {
return HUF_compressCTable_internal(ostart, op, oend, return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize, src, srcSize,
nbStreams, oldHufTable, bmi2); nbStreams, oldHufTable, flags);
} }
/* If uncompressible data is suspected, do a smaller sampling first */ /* If uncompressible data is suspected, do a smaller sampling first */
DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2); DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
size_t largestTotal = 0; size_t largestTotal = 0;
DEBUGLOG(5, "input suspected incompressible : sampling to check");
{ unsigned maxSymbolValueBegin = maxSymbolValue; { unsigned maxSymbolValueBegin = maxSymbolValue;
CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
largestTotal += largestBegin; largestTotal += largestBegin;
@ -1224,6 +1346,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
} }
DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));
/* Check validity of previous table */ /* Check validity of previous table */
if ( repeat if ( repeat
@ -1232,19 +1355,20 @@ HUF_compress_internal (void* dst, size_t dstSize,
*repeat = HUF_repeat_none; *repeat = HUF_repeat_none;
} }
/* Heuristic : use existing table for small inputs */ /* Heuristic : use existing table for small inputs */
if (preferRepeat && repeat && *repeat != HUF_repeat_none) { if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {
return HUF_compressCTable_internal(ostart, op, oend, return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize, src, srcSize,
nbStreams, oldHufTable, bmi2); nbStreams, oldHufTable, flags);
} }
/* Build Huffman Tree */ /* Build Huffman Tree */
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);
{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
maxSymbolValue, huffLog, maxSymbolValue, huffLog,
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
CHECK_F(maxBits); CHECK_F(maxBits);
huffLog = (U32)maxBits; huffLog = (U32)maxBits;
DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
} }
/* Zero unused symbols in CTable, so we can check it for validity */ /* Zero unused symbols in CTable, so we can check it for validity */
{ {
@ -1263,7 +1387,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
return HUF_compressCTable_internal(ostart, op, oend, return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize, src, srcSize,
nbStreams, oldHufTable, bmi2); nbStreams, oldHufTable, flags);
} } } }
/* Use the new huffman table */ /* Use the new huffman table */
@ -1275,46 +1399,20 @@ HUF_compress_internal (void* dst, size_t dstSize,
} }
return HUF_compressCTable_internal(ostart, op, oend, return HUF_compressCTable_internal(ostart, op, oend,
src, srcSize, src, srcSize,
nbStreams, table->CTable, bmi2); nbStreams, table->CTable, flags);
}
size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_singleStream,
workSpace, wkspSize,
NULL, NULL, 0, 0 /*bmi2*/, 0);
} }
size_t HUF_compress1X_repeat (void* dst, size_t dstSize, size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog, unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize, void* workSpace, size_t wkspSize,
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
int bmi2, unsigned suspectUncompressible)
{ {
DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
return HUF_compress_internal(dst, dstSize, src, srcSize, return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_singleStream, maxSymbolValue, huffLog, HUF_singleStream,
workSpace, wkspSize, hufTable, workSpace, wkspSize, hufTable,
repeat, preferRepeat, bmi2, suspectUncompressible); repeat, flags);
}
/* HUF_compress4X_repeat():
* compress input using 4 streams.
* provide workspace to generate compression tables */
size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_fourStreams,
workSpace, wkspSize,
NULL, NULL, 0, 0 /*bmi2*/, 0);
} }
/* HUF_compress4X_repeat(): /* HUF_compress4X_repeat():
@ -1325,11 +1423,11 @@ size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize, const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog, unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize, void* workSpace, size_t wkspSize,
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible) HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
{ {
DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
return HUF_compress_internal(dst, dstSize, src, srcSize, return HUF_compress_internal(dst, dstSize, src, srcSize,
maxSymbolValue, huffLog, HUF_fourStreams, maxSymbolValue, huffLog, HUF_fourStreams,
workSpace, wkspSize, workSpace, wkspSize,
hufTable, repeat, preferRepeat, bmi2, suspectUncompressible); hufTable, repeat, flags);
} }

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -20,6 +21,7 @@
***************************************/ ***************************************/
#include "../common/zstd_internal.h" #include "../common/zstd_internal.h"
#include "zstd_cwksp.h" #include "zstd_cwksp.h"
#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */
/*-************************************* /*-*************************************
@ -111,12 +113,13 @@ typedef struct {
/* ZSTD_buildBlockEntropyStats() : /* ZSTD_buildBlockEntropyStats() :
* Builds entropy for the block. * Builds entropy for the block.
* @return : 0 on success or error code */ * @return : 0 on success or error code */
size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, size_t ZSTD_buildBlockEntropyStats(
const ZSTD_entropyCTables_t* prevEntropy, const seqStore_t* seqStorePtr,
ZSTD_entropyCTables_t* nextEntropy, const ZSTD_entropyCTables_t* prevEntropy,
const ZSTD_CCtx_params* cctxParams, ZSTD_entropyCTables_t* nextEntropy,
ZSTD_entropyCTablesMetadata_t* entropyMetadata, const ZSTD_CCtx_params* cctxParams,
void* workspace, size_t wkspSize); ZSTD_entropyCTablesMetadata_t* entropyMetadata,
void* workspace, size_t wkspSize);
/* ******************************* /* *******************************
* Compression internals structs * * Compression internals structs *
@ -142,6 +145,12 @@ typedef struct {
size_t capacity; /* The capacity starting from `seq` pointer */ size_t capacity; /* The capacity starting from `seq` pointer */
} rawSeqStore_t; } rawSeqStore_t;
typedef struct {
U32 idx; /* Index in array of ZSTD_Sequence */
U32 posInSequence; /* Position within sequence at idx */
size_t posInSrc; /* Number of bytes given by sequences provided so far */
} ZSTD_sequencePosition;
UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
typedef struct { typedef struct {
@ -212,8 +221,10 @@ struct ZSTD_matchState_t {
U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
U64 hashSalt; /* For row-based matchFinder: salts the hash for re-use of tag table */
U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */
U32* hashTable; U32* hashTable;
U32* hashTable3; U32* hashTable3;
@ -228,6 +239,18 @@ struct ZSTD_matchState_t {
const ZSTD_matchState_t* dictMatchState; const ZSTD_matchState_t* dictMatchState;
ZSTD_compressionParameters cParams; ZSTD_compressionParameters cParams;
const rawSeqStore_t* ldmSeqStore; const rawSeqStore_t* ldmSeqStore;
/* Controls prefetching in some dictMatchState matchfinders.
* This behavior is controlled from the cctx ms.
* This parameter has no effect in the cdict ms. */
int prefetchCDictTables;
/* When == 0, lazy match finders insert every position.
* When != 0, lazy match finders only insert positions they search.
* This allows them to skip much faster over incompressible data,
* at a small cost to compression ratio.
*/
int lazySkipping;
}; };
typedef struct { typedef struct {
@ -324,6 +347,24 @@ struct ZSTD_CCtx_params_s {
/* Internal use, for createCCtxParams() and freeCCtxParams() only */ /* Internal use, for createCCtxParams() and freeCCtxParams() only */
ZSTD_customMem customMem; ZSTD_customMem customMem;
/* Controls prefetching in some dictMatchState matchfinders */
ZSTD_paramSwitch_e prefetchCDictTables;
/* Controls whether zstd will fall back to an internal matchfinder
* if the external matchfinder returns an error code. */
int enableMatchFinderFallback;
/* Indicates whether an external matchfinder has been referenced.
* Users can't set this externally.
* It is set internally in ZSTD_registerSequenceProducer(). */
int useSequenceProducer;
/* Adjust the max block size*/
size_t maxBlockSize;
/* Controls repcode search in external sequence parsing */
ZSTD_paramSwitch_e searchForExternalRepcodes;
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
@ -355,6 +396,14 @@ typedef struct {
ZSTD_entropyCTablesMetadata_t entropyMetadata; ZSTD_entropyCTablesMetadata_t entropyMetadata;
} ZSTD_blockSplitCtx; } ZSTD_blockSplitCtx;
/* Context for block-level external matchfinder API */
typedef struct {
void* mState;
ZSTD_sequenceProducer_F* mFinder;
ZSTD_Sequence* seqBuffer;
size_t seqBufferCapacity;
} ZSTD_externalMatchCtx;
struct ZSTD_CCtx_s { struct ZSTD_CCtx_s {
ZSTD_compressionStage_e stage; ZSTD_compressionStage_e stage;
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
@ -404,6 +453,7 @@ struct ZSTD_CCtx_s {
/* Stable in/out buffer verification */ /* Stable in/out buffer verification */
ZSTD_inBuffer expectedInBuffer; ZSTD_inBuffer expectedInBuffer;
size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */
size_t expectedOutBufferSize; size_t expectedOutBufferSize;
/* Dictionary */ /* Dictionary */
@ -417,9 +467,13 @@ struct ZSTD_CCtx_s {
/* Workspace for block splitter */ /* Workspace for block splitter */
ZSTD_blockSplitCtx blockSplitCtx; ZSTD_blockSplitCtx blockSplitCtx;
/* Workspace for external matchfinder */
ZSTD_externalMatchCtx externalMatchCtx;
}; };
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e;
typedef enum { typedef enum {
ZSTD_noDict = 0, ZSTD_noDict = 0,
@ -441,7 +495,7 @@ typedef enum {
* In this mode we take both the source size and the dictionary size * In this mode we take both the source size and the dictionary size
* into account when selecting and adjusting the parameters. * into account when selecting and adjusting the parameters.
*/ */
ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams. ZSTD_cpm_unknown = 3 /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
* We don't know what these parameters are for. We default to the legacy * We don't know what these parameters are for. We default to the legacy
* behavior of taking both the source size and the dict size into account * behavior of taking both the source size and the dict size into account
* when selecting and adjusting parameters. * when selecting and adjusting parameters.
@ -500,9 +554,11 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
/* ZSTD_noCompressBlock() : /* ZSTD_noCompressBlock() :
* Writes uncompressed block to dst buffer from given src. * Writes uncompressed block to dst buffer from given src.
* Returns the size of the block */ * Returns the size of the block */
MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) MEM_STATIC size_t
ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
{ {
U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity);
RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
dstSize_tooSmall, "dst buf too small for uncompressed block"); dstSize_tooSmall, "dst buf too small for uncompressed block");
MEM_writeLE24(dst, cBlockHeader24); MEM_writeLE24(dst, cBlockHeader24);
@ -510,7 +566,8 @@ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const voi
return ZSTD_blockHeaderSize + srcSize; return ZSTD_blockHeaderSize + srcSize;
} }
MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) MEM_STATIC size_t
ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
{ {
BYTE* const op = (BYTE*)dst; BYTE* const op = (BYTE*)dst;
U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
@ -529,7 +586,7 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
{ {
U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat));
return (srcSize >> minlog) + 2; return (srcSize >> minlog) + 2;
} }
@ -565,29 +622,27 @@ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE con
while (ip < iend) *op++ = *ip++; while (ip < iend) *op++ = *ip++;
} }
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
#define STORE_REPCODE_1 STORE_REPCODE(1) #define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1)
#define STORE_REPCODE_2 STORE_REPCODE(2) #define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2)
#define STORE_REPCODE_3 STORE_REPCODE(3) #define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3)
#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1) #define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */
#define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE) #define OFFSET_TO_OFFBASE(o) (assert((o)>0), o + ZSTD_REP_NUM)
#define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE) #define OFFBASE_IS_OFFSET(o) ((o) > ZSTD_REP_NUM)
#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE) #define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM)
#define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE) #define OFFBASE_TO_OFFSET(o) (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM)
#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */ #define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o)) /* returns ID 1,2,3 */
#define STORED_TO_OFFBASE(o) ((o)+1)
#define OFFBASE_TO_STORED(o) ((o)-1)
/*! ZSTD_storeSeq() : /*! ZSTD_storeSeq() :
* Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t. * Store a sequence (litlen, litPtr, offBase and matchLength) into seqStore_t.
* @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET(). * @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
* @matchLength : must be >= MINMATCH * @matchLength : must be >= MINMATCH
* Allowed to overread literals up to litLimit. * Allowed to over-read literals up to litLimit.
*/ */
HINT_INLINE UNUSED_ATTR void HINT_INLINE UNUSED_ATTR void
ZSTD_storeSeq(seqStore_t* seqStorePtr, ZSTD_storeSeq(seqStore_t* seqStorePtr,
size_t litLength, const BYTE* literals, const BYTE* litLimit, size_t litLength, const BYTE* literals, const BYTE* litLimit,
U32 offBase_minus1, U32 offBase,
size_t matchLength) size_t matchLength)
{ {
BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
@ -596,8 +651,8 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
static const BYTE* g_start = NULL; static const BYTE* g_start = NULL;
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
{ U32 const pos = (U32)((const BYTE*)literals - g_start); { U32 const pos = (U32)((const BYTE*)literals - g_start);
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u",
pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1); pos, (U32)litLength, (U32)matchLength, (U32)offBase);
} }
#endif #endif
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
@ -607,9 +662,9 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
assert(literals + litLength <= litLimit); assert(literals + litLength <= litLimit);
if (litEnd <= litLimit_w) { if (litEnd <= litLimit_w) {
/* Common case we can use wildcopy. /* Common case we can use wildcopy.
* First copy 16 bytes, because literals are likely short. * First copy 16 bytes, because literals are likely short.
*/ */
assert(WILDCOPY_OVERLENGTH >= 16); ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16);
ZSTD_copy16(seqStorePtr->lit, literals); ZSTD_copy16(seqStorePtr->lit, literals);
if (litLength > 16) { if (litLength > 16) {
ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
@ -628,7 +683,7 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
seqStorePtr->sequences[0].litLength = (U16)litLength; seqStorePtr->sequences[0].litLength = (U16)litLength;
/* match offset */ /* match offset */
seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1); seqStorePtr->sequences[0].offBase = offBase;
/* match Length */ /* match Length */
assert(matchLength >= MINMATCH); assert(matchLength >= MINMATCH);
@ -646,17 +701,17 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
/* ZSTD_updateRep() : /* ZSTD_updateRep() :
* updates in-place @rep (array of repeat offsets) * updates in-place @rep (array of repeat offsets)
* @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq() * @offBase : sum-type, using numeric representation of ZSTD_storeSeq()
*/ */
MEM_STATIC void MEM_STATIC void
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
{ {
if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */ if (OFFBASE_IS_OFFSET(offBase)) { /* full offset */
rep[2] = rep[1]; rep[2] = rep[1];
rep[1] = rep[0]; rep[1] = rep[0];
rep[0] = STORED_OFFSET(offBase_minus1); rep[0] = OFFBASE_TO_OFFSET(offBase);
} else { /* repcode */ } else { /* repcode */
U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0; U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0;
if (repCode > 0) { /* note : if repCode==0, no change */ if (repCode > 0) { /* note : if repCode==0, no change */
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
rep[2] = (repCode >= 2) ? rep[1] : rep[2]; rep[2] = (repCode >= 2) ? rep[1] : rep[2];
@ -673,11 +728,11 @@ typedef struct repcodes_s {
} repcodes_t; } repcodes_t;
MEM_STATIC repcodes_t MEM_STATIC repcodes_t
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
{ {
repcodes_t newReps; repcodes_t newReps;
ZSTD_memcpy(&newReps, rep, sizeof(newReps)); ZSTD_memcpy(&newReps, rep, sizeof(newReps));
ZSTD_updateRep(newReps.rep, offBase_minus1, ll0); ZSTD_updateRep(newReps.rep, offBase, ll0);
return newReps; return newReps;
} }
@ -685,59 +740,6 @@ ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0
/*-************************************* /*-*************************************
* Match length counter * Match length counter
***************************************/ ***************************************/
static unsigned ZSTD_NbCommonBytes (size_t val)
{
if (MEM_isLittleEndian()) {
if (MEM_64bits()) {
# if (__GNUC__ >= 4)
return (__builtin_ctzll((U64)val) >> 3);
# else
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
0, 3, 1, 3, 1, 4, 2, 7,
0, 2, 3, 6, 1, 5, 3, 5,
1, 3, 4, 4, 2, 5, 6, 7,
7, 0, 1, 2, 3, 3, 4, 6,
2, 6, 5, 5, 3, 4, 5, 6,
7, 1, 2, 4, 6, 4, 4, 5,
7, 2, 6, 5, 7, 6, 7, 7 };
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
# endif
} else { /* 32 bits */
# if (__GNUC__ >= 3)
return (__builtin_ctz((U32)val) >> 3);
# else
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
3, 2, 2, 1, 3, 2, 0, 1,
3, 3, 1, 2, 2, 2, 2, 0,
3, 1, 2, 0, 1, 0, 1, 1 };
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
# endif
}
} else { /* Big Endian CPU */
if (MEM_64bits()) {
# if (__GNUC__ >= 4)
return (__builtin_clzll(val) >> 3);
# else
unsigned r;
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
r += (!val);
return r;
# endif
} else { /* 32 bits */
# if (__GNUC__ >= 3)
return (__builtin_clz((U32)val) >> 3);
# else
unsigned r;
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
r += (!val);
return r;
# endif
} }
}
MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
{ {
const BYTE* const pStart = pIn; const BYTE* const pStart = pIn;
@ -783,32 +785,43 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
* Hashes * Hashes
***************************************/ ***************************************/
static const U32 prime3bytes = 506832829U; static const U32 prime3bytes = 506832829U;
static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; } static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s) >> (32-h) ; }
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */
MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); }
static const U32 prime4bytes = 2654435761U; static const U32 prime4bytes = 2654435761U;
static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; }
static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); }
static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); }
static const U64 prime5bytes = 889523592379ULL; static const U64 prime5bytes = 889523592379ULL;
static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); }
static const U64 prime6bytes = 227718039650203ULL; static const U64 prime6bytes = 227718039650203ULL;
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); }
static const U64 prime7bytes = 58295818150454627ULL; static const U64 prime7bytes = 58295818150454627ULL;
static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); }
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; }
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); }
static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); }
MEM_STATIC FORCE_INLINE_ATTR MEM_STATIC FORCE_INLINE_ATTR
size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
{ {
/* Although some of these hashes do support hBits up to 64, some do not.
* To be on the safe side, always avoid hBits > 32. */
assert(hBits <= 32);
switch(mls) switch(mls)
{ {
default: default:
@ -820,6 +833,24 @@ size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
} }
} }
MEM_STATIC FORCE_INLINE_ATTR
size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) {
/* Although some of these hashes do support hBits up to 64, some do not.
* To be on the safe side, always avoid hBits > 32. */
assert(hBits <= 32);
switch(mls)
{
default:
case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt);
case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt);
case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt);
case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt);
case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt);
}
}
/* ZSTD_ipow() : /* ZSTD_ipow() :
* Return base^exponent. * Return base^exponent.
*/ */
@ -1167,10 +1198,15 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
assert(blockEndIdx >= loadedDictEnd); assert(blockEndIdx >= loadedDictEnd);
if (blockEndIdx > loadedDictEnd + maxDist) { if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) {
/* On reaching window size, dictionaries are invalidated. /* On reaching window size, dictionaries are invalidated.
* For simplification, if window size is reached anywhere within next block, * For simplification, if window size is reached anywhere within next block,
* the dictionary is invalidated for the full block. * the dictionary is invalidated for the full block.
*
* We also have to invalidate the dictionary if ZSTD_window_update() has detected
* non-contiguous segments, which means that loadedDictEnd != window->dictLimit.
* loadedDictEnd may be 0, if forceWindow is true, but in that case we never use
* dictMatchState, so setting it to NULL is not a problem.
*/ */
DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
*loadedDictEndPtr = 0; *loadedDictEndPtr = 0;
@ -1302,6 +1338,42 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
#endif #endif
/* Short Cache */
/* Normally, zstd matchfinders follow this flow:
* 1. Compute hash at ip
* 2. Load index from hashTable[hash]
* 3. Check if *ip == *(base + index)
* In dictionary compression, loading *(base + index) is often an L2 or even L3 miss.
*
* Short cache is an optimization which allows us to avoid step 3 most of the time
* when the data doesn't actually match. With short cache, the flow becomes:
* 1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip.
* 2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works.
* 3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue.
*
* Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to
* dictMatchState matchfinders.
*/
#define ZSTD_SHORT_CACHE_TAG_BITS 8
#define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1)
/* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable.
* Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */
MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) {
size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK);
assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0);
hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag;
}
/* Helper function for short cache matchfinders.
* Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */
MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) {
U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK;
U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK;
return tag1 == tag2;
}
/* =============================================================== /* ===============================================================
@ -1396,4 +1468,51 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
*/ */
void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
* ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
* Note that the block delimiter must include the last literals of the block.
*/
size_t
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
/* Returns the number of bytes to move the current read position back by.
* Only non-zero if we ended up splitting a sequence.
* Otherwise, it may return a ZSTD error if something went wrong.
*
* This function will attempt to scan through blockSize bytes
* represented by the sequences in @inSeqs,
* storing any (partial) sequences.
*
* Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
* avoid splitting a match, or to avoid splitting a match such that it would produce a match
* smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
*/
size_t
ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
/* ===============================================================
* Deprecated definitions that are still used internally to avoid
* deprecation warnings. These functions are exactly equivalent to
* their public variants, but avoid the deprecation warnings.
* =============================================================== */
size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
#endif /* ZSTD_COMPRESS_H */ #endif /* ZSTD_COMPRESS_H */

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -13,11 +14,36 @@
***************************************/ ***************************************/
#include "zstd_compress_literals.h" #include "zstd_compress_literals.h"
/* **************************************************************
* Debug Traces
****************************************************************/
#if DEBUGLEVEL >= 2
static size_t showHexa(const void* src, size_t srcSize)
{
const BYTE* const ip = (const BYTE*)src;
size_t u;
for (u=0; u<srcSize; u++) {
RAWLOG(5, " %02X", ip[u]); (void)ip;
}
RAWLOG(5, " \n");
return srcSize;
}
#endif
/* **************************************************************
* Literals compression - special cases
****************************************************************/
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{ {
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
switch(flSize) switch(flSize)
@ -36,16 +62,30 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
} }
ZSTD_memcpy(ostart + flSize, src, srcSize); ZSTD_memcpy(ostart + flSize, src, srcSize);
DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
return srcSize + flSize; return srcSize + flSize;
} }
static int allBytesIdentical(const void* src, size_t srcSize)
{
assert(srcSize >= 1);
assert(src != NULL);
{ const BYTE b = ((const BYTE*)src)[0];
size_t p;
for (p=1; p<srcSize; p++) {
if (((const BYTE*)src)[p] != b) return 0;
}
return 1;
}
}
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{ {
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ assert(dstCapacity >= 4); (void)dstCapacity;
assert(allBytesIdentical(src, srcSize));
switch(flSize) switch(flSize)
{ {
@ -63,28 +103,51 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void*
} }
ostart[flSize] = *(const BYTE*)src; ostart[flSize] = *(const BYTE*)src;
DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
return flSize+1; return flSize+1;
} }
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, /* ZSTD_minLiteralsToCompress() :
ZSTD_hufCTables_t* nextHuf, * returns minimal amount of literals
ZSTD_strategy strategy, int disableLiteralCompression, * for literal compression to even be attempted.
void* dst, size_t dstCapacity, * Minimum is made tighter as compression strategy increases.
const void* src, size_t srcSize, */
void* entropyWorkspace, size_t entropyWorkspaceSize, static size_t
const int bmi2, ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
unsigned suspectUncompressible) {
assert((int)strategy >= 0);
assert((int)strategy <= 9);
/* btultra2 : min 8 bytes;
* then 2x larger for each successive compression strategy
* max threshold 64 bytes */
{ int const shift = MIN(9-(int)strategy, 3);
size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
return mintc;
}
}
size_t ZSTD_compressLiterals (
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
const ZSTD_hufCTables_t* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy,
int disableLiteralCompression,
int suspectUncompressible,
int bmi2)
{ {
size_t const minGain = ZSTD_minGain(srcSize, strategy);
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
U32 singleStream = srcSize < 256; U32 singleStream = srcSize < 256;
symbolEncodingType_e hType = set_compressed; symbolEncodingType_e hType = set_compressed;
size_t cLitSize; size_t cLitSize;
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
disableLiteralCompression, (U32)srcSize); disableLiteralCompression, (U32)srcSize, dstCapacity);
DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
/* Prepare nextEntropy assuming reusing the existing table */ /* Prepare nextEntropy assuming reusing the existing table */
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
@ -92,40 +155,51 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
if (disableLiteralCompression) if (disableLiteralCompression)
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
/* small ? don't even attempt compression (speed opt) */ /* if too small, don't even attempt compression (speed opt) */
# define COMPRESS_LITERALS_SIZE_MIN 63 if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
{ HUF_repeat repeat = prevHuf->repeatMode; { HUF_repeat repeat = prevHuf->repeatMode;
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; int const flags = 0
| (bmi2 ? HUF_flags_bmi2 : 0)
| (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
| (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
| (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
huf_compress_f huf_compress;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
cLitSize = singleStream ? huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
HUF_compress1X_repeat( cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
ostart+lhSize, dstCapacity-lhSize, src, srcSize, src, srcSize,
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, HUF_SYMBOLVALUE_MAX, LitHufLog,
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) : entropyWorkspace, entropyWorkspaceSize,
HUF_compress4X_repeat( (HUF_CElt*)nextHuf->CTable,
ostart+lhSize, dstCapacity-lhSize, src, srcSize, &repeat, flags);
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
if (repeat != HUF_repeat_none) { if (repeat != HUF_repeat_none) {
/* reused the existing table */ /* reused the existing table */
DEBUGLOG(5, "Reusing previous huffman table"); DEBUGLOG(5, "reusing statistics from previous huffman block");
hType = set_repeat; hType = set_repeat;
} }
} }
if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { { size_t const minGain = ZSTD_minGain(srcSize, strategy);
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
} return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
} }
if (cLitSize==1) { if (cLitSize==1) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); /* A return value of 1 signals that the alphabet consists of a single symbol.
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); * However, in some rare circumstances, it could be the compressed size (a single byte).
} * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
* (it's also necessary to not generate statistics).
* Therefore, in such a case, actively check that all bytes are identical. */
if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
} }
if (hType == set_compressed) { if (hType == set_compressed) {
/* using a newly constructed table */ /* using a newly constructed table */
@ -136,16 +210,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
switch(lhSize) switch(lhSize)
{ {
case 3: /* 2 - 2 - 10 - 10 */ case 3: /* 2 - 2 - 10 - 10 */
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
MEM_writeLE24(ostart, lhc); MEM_writeLE24(ostart, lhc);
break; break;
} }
case 4: /* 2 - 2 - 14 - 14 */ case 4: /* 2 - 2 - 14 - 14 */
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
MEM_writeLE32(ostart, lhc); MEM_writeLE32(ostart, lhc);
break; break;
} }
case 5: /* 2 - 2 - 18 - 18 */ case 5: /* 2 - 2 - 18 - 18 */
assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
MEM_writeLE32(ostart, lhc); MEM_writeLE32(ostart, lhc);
ostart[4] = (BYTE)(cLitSize >> 10); ostart[4] = (BYTE)(cLitSize >> 10);

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -16,16 +17,24 @@
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
/* ZSTD_compressRleLiteralsBlock() :
* Conditions :
* - All bytes in @src are identical
* - dstCapacity >= 4 */
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ /* ZSTD_compressLiterals():
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, * @entropyWorkspace: must be aligned on 4-bytes boundaries
ZSTD_hufCTables_t* nextHuf, * @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE
ZSTD_strategy strategy, int disableLiteralCompression, * @suspectUncompressible: sampling checks, to potentially skip huffman coding
void* dst, size_t dstCapacity, */
size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize, void* entropyWorkspace, size_t entropyWorkspaceSize,
const int bmi2, const ZSTD_hufCTables_t* prevHuf,
unsigned suspectUncompressible); ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
int suspectUncompressible,
int bmi2);
#endif /* ZSTD_COMPRESS_LITERALS_H */ #endif /* ZSTD_COMPRESS_LITERALS_H */

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -58,7 +59,7 @@ static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
{ {
/* Heuristic: This should cover most blocks <= 16K and /* Heuristic: This should cover most blocks <= 16K and
* start to fade out after 16K to about 32K depending on * start to fade out after 16K to about 32K depending on
* comprssibility. * compressibility.
*/ */
return nbSeq >= 2048; return nbSeq >= 2048;
} }
@ -166,7 +167,7 @@ ZSTD_selectEncodingType(
if (mostFrequent == nbSeq) { if (mostFrequent == nbSeq) {
*repeatMode = FSE_repeat_none; *repeatMode = FSE_repeat_none;
if (isDefaultAllowed && nbSeq <= 2) { if (isDefaultAllowed && nbSeq <= 2) {
/* Prefer set_basic over set_rle when there are 2 or less symbols, /* Prefer set_basic over set_rle when there are 2 or fewer symbols,
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
* If basic encoding isn't possible, always choose RLE. * If basic encoding isn't possible, always choose RLE.
*/ */

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -36,13 +37,14 @@
* If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
* and the following sub-blocks' literals sections will be Treeless_Literals_Block. * and the following sub-blocks' literals sections will be Treeless_Literals_Block.
* @return : compressed size of literals section of a sub-block * @return : compressed size of literals section of a sub-block
* Or 0 if it unable to compress. * Or 0 if unable to compress.
* Or error code */ * Or error code */
static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, static size_t
const ZSTD_hufCTablesMetadata_t* hufMetadata, ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
const BYTE* literals, size_t litSize, const ZSTD_hufCTablesMetadata_t* hufMetadata,
void* dst, size_t dstSize, const BYTE* literals, size_t litSize,
const int bmi2, int writeEntropy, int* entropyWritten) void* dst, size_t dstSize,
const int bmi2, int writeEntropy, int* entropyWritten)
{ {
size_t const header = writeEntropy ? 200 : 0; size_t const header = writeEntropy ? 200 : 0;
size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
@ -53,8 +55,6 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
size_t cLitSize = 0; size_t cLitSize = 0;
(void)bmi2; /* TODO bmi2... */
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
*entropyWritten = 0; *entropyWritten = 0;
@ -76,9 +76,9 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
} }
/* TODO bmi2 */ { int const flags = bmi2 ? HUF_flags_bmi2 : 0;
{ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable, flags)
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable, flags);
op += cSize; op += cSize;
cLitSize += cSize; cLitSize += cSize;
if (cSize == 0 || ERR_isError(cSize)) { if (cSize == 0 || ERR_isError(cSize)) {
@ -126,7 +126,11 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
return op-ostart; return op-ostart;
} }
static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { static size_t
ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
const seqDef* sequences, size_t nbSeq,
size_t litSize, int lastSequence)
{
const seqDef* const sstart = sequences; const seqDef* const sstart = sequences;
const seqDef* const send = sequences + nbSeq; const seqDef* const send = sequences + nbSeq;
const seqDef* sp = sstart; const seqDef* sp = sstart;
@ -156,13 +160,14 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
* @return : compressed size of sequences section of a sub-block * @return : compressed size of sequences section of a sub-block
* Or 0 if it is unable to compress * Or 0 if it is unable to compress
* Or error code. */ * Or error code. */
static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, static size_t
const ZSTD_fseCTablesMetadata_t* fseMetadata, ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
const seqDef* sequences, size_t nbSeq, const ZSTD_fseCTablesMetadata_t* fseMetadata,
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, const seqDef* sequences, size_t nbSeq,
const ZSTD_CCtx_params* cctxParams, const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
void* dst, size_t dstCapacity, const ZSTD_CCtx_params* cctxParams,
const int bmi2, int writeEntropy, int* entropyWritten) void* dst, size_t dstCapacity,
const int bmi2, int writeEntropy, int* entropyWritten)
{ {
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
BYTE* const ostart = (BYTE*)dst; BYTE* const ostart = (BYTE*)dst;
@ -539,7 +544,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
repcodes_t rep; repcodes_t rep;
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
for (seq = sstart; seq < sp; ++seq) { for (seq = sstart; seq < sp; ++seq) {
ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
} }
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep)); ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
} }

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -14,7 +15,9 @@
/*-************************************* /*-*************************************
* Dependencies * Dependencies
***************************************/ ***************************************/
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
#include "../common/zstd_internal.h" #include "../common/zstd_internal.h"
#include "../common/portability_macros.h"
/*-************************************* /*-*************************************
@ -41,8 +44,9 @@
***************************************/ ***************************************/
typedef enum { typedef enum {
ZSTD_cwksp_alloc_objects, ZSTD_cwksp_alloc_objects,
ZSTD_cwksp_alloc_buffers, ZSTD_cwksp_alloc_aligned_init_once,
ZSTD_cwksp_alloc_aligned ZSTD_cwksp_alloc_aligned,
ZSTD_cwksp_alloc_buffers
} ZSTD_cwksp_alloc_phase_e; } ZSTD_cwksp_alloc_phase_e;
/* /*
@ -95,8 +99,8 @@ typedef enum {
* *
* Workspace Layout: * Workspace Layout:
* *
* [ ... workspace ... ] * [ ... workspace ... ]
* [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] * [objects][tables ->] free space [<- buffers][<- aligned][<- init once]
* *
* The various objects that live in the workspace are divided into the * The various objects that live in the workspace are divided into the
* following categories, and are allocated separately: * following categories, and are allocated separately:
@ -120,9 +124,18 @@ typedef enum {
* uint32_t arrays, all of whose values are between 0 and (nextSrc - base). * uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
* Their sizes depend on the cparams. These tables are 64-byte aligned. * Their sizes depend on the cparams. These tables are 64-byte aligned.
* *
* - Aligned: these buffers are used for various purposes that require 4 byte * - Init once: these buffers require to be initialized at least once before
* alignment, but don't require any initialization before they're used. These * use. They should be used when we want to skip memory initialization
* buffers are each aligned to 64 bytes. * while not triggering memory checkers (like Valgrind) when reading from
* from this memory without writing to it first.
* These buffers should be used carefully as they might contain data
* from previous compressions.
* Buffers are aligned to 64 bytes.
*
* - Aligned: these buffers don't require any initialization before they're
* used. The user of the buffer should make sure they write into a buffer
* location before reading from it.
* Buffers are aligned to 64 bytes.
* *
* - Buffers: these buffers are used for various purposes that don't require * - Buffers: these buffers are used for various purposes that don't require
* any alignment or initialization before they're used. This means they can * any alignment or initialization before they're used. This means they can
@ -134,8 +147,9 @@ typedef enum {
* correctly packed into the workspace buffer. That order is: * correctly packed into the workspace buffer. That order is:
* *
* 1. Objects * 1. Objects
* 2. Buffers * 2. Init once / Tables
* 3. Aligned/Tables * 3. Aligned / Tables
* 4. Buffers / Tables
* *
* Attempts to reserve objects of different types out of order will fail. * Attempts to reserve objects of different types out of order will fail.
*/ */
@ -147,6 +161,7 @@ typedef struct {
void* tableEnd; void* tableEnd;
void* tableValidEnd; void* tableValidEnd;
void* allocStart; void* allocStart;
void* initOnceStart;
BYTE allocFailed; BYTE allocFailed;
int workspaceOversizedDuration; int workspaceOversizedDuration;
@ -159,6 +174,7 @@ typedef struct {
***************************************/ ***************************************/
MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws);
MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
(void)ws; (void)ws;
@ -168,6 +184,8 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
assert(ws->tableEnd <= ws->allocStart); assert(ws->tableEnd <= ws->allocStart);
assert(ws->tableValidEnd <= ws->allocStart); assert(ws->tableValidEnd <= ws->allocStart);
assert(ws->allocStart <= ws->workspaceEnd); assert(ws->allocStart <= ws->workspaceEnd);
assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws));
assert(ws->workspace <= ws->initOnceStart);
} }
/* /*
@ -210,14 +228,10 @@ MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
* for internal purposes (currently only alignment). * for internal purposes (currently only alignment).
*/ */
MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) { MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
/* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes /* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES
* to align the beginning of tables section, as well as another n_2=[0, 63] bytes * bytes to align the beginning of tables section and end of buffers;
* to align the beginning of the aligned section.
*
* n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
* aligneds being sized in multiples of 64 bytes.
*/ */
size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES; size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2;
return slackSpace; return slackSpace;
} }
@ -230,10 +244,18 @@ MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignByt
size_t const alignBytesMask = alignBytes - 1; size_t const alignBytesMask = alignBytes - 1;
size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask; size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
assert((alignBytes & alignBytesMask) == 0); assert((alignBytes & alignBytesMask) == 0);
assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES); assert(bytes < alignBytes);
return bytes; return bytes;
} }
/*
* Returns the initial value for allocStart which is used to determine the position from
* which we can allocate from the end of the workspace.
*/
MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) {
return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
}
/* /*
* Internal function. Do not use directly. * Internal function. Do not use directly.
* Reserves the given number of bytes within the aligned/buffer segment of the wksp, * Reserves the given number of bytes within the aligned/buffer segment of the wksp,
@ -274,27 +296,16 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
{ {
assert(phase >= ws->phase); assert(phase >= ws->phase);
if (phase > ws->phase) { if (phase > ws->phase) {
/* Going from allocating objects to allocating buffers */ /* Going from allocating objects to allocating initOnce / tables */
if (ws->phase < ZSTD_cwksp_alloc_buffers && if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once &&
phase >= ZSTD_cwksp_alloc_buffers) { phase >= ZSTD_cwksp_alloc_aligned_init_once) {
ws->tableValidEnd = ws->objectEnd; ws->tableValidEnd = ws->objectEnd;
} ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
/* Going from allocating buffers to allocating aligneds/tables */
if (ws->phase < ZSTD_cwksp_alloc_aligned &&
phase >= ZSTD_cwksp_alloc_aligned) {
{ /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
size_t const bytesToAlign =
ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
memory_allocation, "aligned phase - alignment initial allocation failed!");
}
{ /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */ { /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
void* const alloc = ws->objectEnd; void *const alloc = ws->objectEnd;
size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES); size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
void* const objectEnd = (BYTE*)alloc + bytesToAlign; void *const objectEnd = (BYTE *) alloc + bytesToAlign;
DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign); DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation, RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
"table phase - alignment initial allocation failed!"); "table phase - alignment initial allocation failed!");
@ -302,7 +313,9 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
ws->tableEnd = objectEnd; /* table area starts being empty */ ws->tableEnd = objectEnd; /* table area starts being empty */
if (ws->tableValidEnd < ws->tableEnd) { if (ws->tableValidEnd < ws->tableEnd) {
ws->tableValidEnd = ws->tableEnd; ws->tableValidEnd = ws->tableEnd;
} } } }
}
}
ws->phase = phase; ws->phase = phase;
ZSTD_cwksp_assert_internal_consistency(ws); ZSTD_cwksp_assert_internal_consistency(ws);
} }
@ -314,7 +327,7 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
*/ */
MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
{ {
return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd);
} }
/* /*
@ -343,6 +356,33 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
} }
/*
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
* This memory has been initialized at least once in the past.
* This doesn't mean it has been initialized this time, and it might contain data from previous
* operations.
* The main usage is for algorithms that might need read access into uninitialized memory.
* The algorithm must maintain safety under these conditions and must make sure it doesn't
* leak any of the past data (directly or in side channels).
*/
MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes)
{
size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES);
void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once);
assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
if(ptr && ptr < ws->initOnceStart) {
/* We assume the memory following the current allocation is either:
* 1. Not usable as initOnce memory (end of workspace)
* 2. Another initOnce buffer that has been allocated before (and so was previously memset)
* 3. An ASAN redzone, in which case we don't want to write on it
* For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart.
* Note that we assume here that MSAN and ASAN cannot run in the same time. */
ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes));
ws->initOnceStart = ptr;
}
return ptr;
}
/* /*
* Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes). * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
*/ */
@ -361,13 +401,17 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
*/ */
MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
{ {
const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once;
void* alloc; void* alloc;
void* end; void* end;
void* top; void* top;
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) { /* We can only start allocating tables after we are done reserving space for objects at the
return NULL; * start of the workspace */
if(ws->phase < phase) {
if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
return NULL;
}
} }
alloc = ws->tableEnd; alloc = ws->tableEnd;
end = (BYTE *)alloc + bytes; end = (BYTE *)alloc + bytes;
@ -451,7 +495,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
assert(ws->tableValidEnd >= ws->objectEnd); assert(ws->tableValidEnd >= ws->objectEnd);
assert(ws->tableValidEnd <= ws->allocStart); assert(ws->tableValidEnd <= ws->allocStart);
if (ws->tableValidEnd < ws->tableEnd) { if (ws->tableValidEnd < ws->tableEnd) {
ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); ZSTD_memset(ws->tableValidEnd, 0, (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd));
} }
ZSTD_cwksp_mark_tables_clean(ws); ZSTD_cwksp_mark_tables_clean(ws);
} }
@ -478,10 +522,10 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
ws->tableEnd = ws->objectEnd; ws->tableEnd = ws->objectEnd;
ws->allocStart = ws->workspaceEnd; ws->allocStart = ZSTD_cwksp_initialAllocStart(ws);
ws->allocFailed = 0; ws->allocFailed = 0;
if (ws->phase > ZSTD_cwksp_alloc_buffers) { if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) {
ws->phase = ZSTD_cwksp_alloc_buffers; ws->phase = ZSTD_cwksp_alloc_aligned_init_once;
} }
ZSTD_cwksp_assert_internal_consistency(ws); ZSTD_cwksp_assert_internal_consistency(ws);
} }
@ -498,6 +542,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_c
ws->workspaceEnd = (BYTE*)start + size; ws->workspaceEnd = (BYTE*)start + size;
ws->objectEnd = ws->workspace; ws->objectEnd = ws->workspace;
ws->tableValidEnd = ws->objectEnd; ws->tableValidEnd = ws->objectEnd;
ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
ws->phase = ZSTD_cwksp_alloc_objects; ws->phase = ZSTD_cwksp_alloc_objects;
ws->isStatic = isStatic; ws->isStatic = isStatic;
ZSTD_cwksp_clear(ws); ZSTD_cwksp_clear(ws);
@ -550,17 +595,11 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
* Returns if the estimated space needed for a wksp is within an acceptable limit of the * Returns if the estimated space needed for a wksp is within an acceptable limit of the
* actual amount of space used. * actual amount of space used.
*/ */
MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws, MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) {
size_t const estimatedSpace, int resizedWorkspace) { /* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice
if (resizedWorkspace) { * the alignment bytes difference between estimation and actual usage */
/* Resized/newly allocated wksp should have exact bounds */ return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) &&
return ZSTD_cwksp_used(ws) == estimatedSpace; ZSTD_cwksp_used(ws) <= estimatedSpace;
} else {
/* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
* than estimatedSpace. See the comments in zstd_cwksp.h for details.
*/
return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
}
} }

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -11,8 +12,43 @@
#include "zstd_compress_internal.h" #include "zstd_compress_internal.h"
#include "zstd_double_fast.h" #include "zstd_double_fast.h"
static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashLarge = ms->hashTable;
U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
U32 const mls = cParams->minMatch;
U32* const hashSmall = ms->chainTable;
U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* const base = ms->window.base;
const BYTE* ip = base + ms->nextToUpdate;
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
const U32 fastHashFillStep = 3;
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, /* Always insert every fastHashFillStep position into the hash tables.
* Insert the other positions into the large hash table if their entry
* is empty.
*/
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
U32 const curr = (U32)(ip - base);
U32 i;
for (i = 0; i < fastHashFillStep; ++i) {
size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
if (i == 0) {
ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
}
if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
}
/* Only load extra positions for ZSTD_dtlm_full */
if (dtlm == ZSTD_dtlm_fast)
break;
} }
}
static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm) void const* end, ZSTD_dictTableLoadMethod_e dtlm)
{ {
const ZSTD_compressionParameters* const cParams = &ms->cParams; const ZSTD_compressionParameters* const cParams = &ms->cParams;
@ -43,7 +79,19 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
/* Only load extra positions for ZSTD_dtlm_full */ /* Only load extra positions for ZSTD_dtlm_full */
if (dtlm == ZSTD_dtlm_fast) if (dtlm == ZSTD_dtlm_fast)
break; break;
} } } }
}
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp)
{
if (tfp == ZSTD_tfp_forCDict) {
ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
} else {
ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
}
} }
@ -67,7 +115,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
const BYTE* const iend = istart + srcSize; const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE; const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1]; U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0; U32 offsetSaved1 = 0, offsetSaved2 = 0;
size_t mLength; size_t mLength;
U32 offset; U32 offset;
@ -100,8 +148,8 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
U32 const current = (U32)(ip - base); U32 const current = (U32)(ip - base);
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
U32 const maxRep = current - windowLow; U32 const maxRep = current - windowLow;
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
} }
/* Outer Loop: one iteration per match found and stored */ /* Outer Loop: one iteration per match found and stored */
@ -131,7 +179,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++; ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
goto _match_stored; goto _match_stored;
} }
@ -175,9 +223,13 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
} while (ip1 <= ilimit); } while (ip1 <= ilimit);
_cleanup: _cleanup:
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
/* save reps for next block */ /* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved; rep[0] = offset_1 ? offset_1 : offsetSaved1;
rep[1] = offset_2 ? offset_2 : offsetSaved; rep[1] = offset_2 ? offset_2 : offsetSaved2;
/* Return the last literals size */ /* Return the last literals size */
return (size_t)(iend - anchor); return (size_t)(iend - anchor);
@ -217,7 +269,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
hashLong[hl1] = (U32)(ip1 - base); hashLong[hl1] = (U32)(ip1 - base);
} }
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
_match_stored: _match_stored:
/* match found */ /* match found */
@ -243,7 +295,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength); ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
ip += rLength; ip += rLength;
anchor = ip; anchor = ip;
continue; /* faster when present ... (?) */ continue; /* faster when present ... (?) */
@ -275,7 +327,6 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* const iend = istart + srcSize; const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE; const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1]; U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
const ZSTD_matchState_t* const dms = ms->dictMatchState; const ZSTD_matchState_t* const dms = ms->dictMatchState;
const ZSTD_compressionParameters* const dictCParams = &dms->cParams; const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
@ -286,8 +337,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* const dictStart = dictBase + dictStartIndex; const BYTE* const dictStart = dictBase + dictStartIndex;
const BYTE* const dictEnd = dms->window.nextSrc; const BYTE* const dictEnd = dms->window.nextSrc;
const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase); const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
const U32 dictHBitsL = dictCParams->hashLog; const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
const U32 dictHBitsS = dictCParams->chainLog; const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic"); DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
@ -295,6 +346,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
/* if a dictionary is attached, it must be within window range */ /* if a dictionary is attached, it must be within window range */
assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
PREFETCH_AREA(dictHashLong, hashTableBytes)
PREFETCH_AREA(dictHashSmall, chainTableBytes)
}
/* init */ /* init */
ip += (dictAndPrefixLength == 0); ip += (dictAndPrefixLength == 0);
@ -309,8 +367,12 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
U32 offset; U32 offset;
size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
U32 const curr = (U32)(ip-base); U32 const curr = (U32)(ip-base);
U32 const matchIndexL = hashLong[h2]; U32 const matchIndexL = hashLong[h2];
U32 matchIndexS = hashSmall[h]; U32 matchIndexS = hashSmall[h];
@ -328,7 +390,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
ip++; ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
goto _match_stored; goto _match_stored;
} }
@ -340,9 +402,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
goto _match_found; goto _match_found;
} }
} else { } else if (dictTagsMatchL) {
/* check dictMatchState long match */ /* check dictMatchState long match */
U32 const dictMatchIndexL = dictHashLong[dictHL]; U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* dictMatchL = dictBase + dictMatchIndexL; const BYTE* dictMatchL = dictBase + dictMatchIndexL;
assert(dictMatchL < dictEnd); assert(dictMatchL < dictEnd);
@ -358,9 +420,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
if (MEM_read32(match) == MEM_read32(ip)) { if (MEM_read32(match) == MEM_read32(ip)) {
goto _search_next_long; goto _search_next_long;
} }
} else { } else if (dictTagsMatchS) {
/* check dictMatchState short match */ /* check dictMatchState short match */
U32 const dictMatchIndexS = dictHashSmall[dictHS]; U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
match = dictBase + dictMatchIndexS; match = dictBase + dictMatchIndexS;
matchIndexS = dictMatchIndexS + dictIndexDelta; matchIndexS = dictMatchIndexS + dictIndexDelta;
@ -375,10 +437,11 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
continue; continue;
_search_next_long: _search_next_long:
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
U32 const matchIndexL3 = hashLong[hl3]; U32 const matchIndexL3 = hashLong[hl3];
U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
const BYTE* matchL3 = base + matchIndexL3; const BYTE* matchL3 = base + matchIndexL3;
hashLong[hl3] = curr + 1; hashLong[hl3] = curr + 1;
@ -391,9 +454,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
goto _match_found; goto _match_found;
} }
} else { } else if (dictTagsMatchL3) {
/* check dict long +1 match */ /* check dict long +1 match */
U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
assert(dictMatchL3 < dictEnd); assert(dictMatchL3 < dictEnd);
if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
@ -419,7 +482,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
_match_stored: _match_stored:
/* match found */ /* match found */
@ -448,7 +511,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2; ip += repLength2;
@ -461,8 +524,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
} /* while (ip < ilimit) */ } /* while (ip < ilimit) */
/* save reps for next block */ /* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved; rep[0] = offset_1;
rep[1] = offset_2 ? offset_2 : offsetSaved; rep[1] = offset_2;
/* Return the last literals size */ /* Return the last literals size */
return (size_t)(iend - anchor); return (size_t)(iend - anchor);
@ -585,7 +648,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++; ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
} else { } else {
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@ -596,7 +659,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@ -621,7 +684,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
} }
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
} else { } else {
ip += ((ip-anchor) >> kSearchStrength) + 1; ip += ((ip-anchor) >> kSearchStrength) + 1;
@ -653,7 +716,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2; ip += repLength2;

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -16,7 +17,8 @@
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm); void const* end, ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp);
size_t ZSTD_compressBlock_doubleFast( size_t ZSTD_compressBlock_doubleFast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -11,8 +12,42 @@
#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
#include "zstd_fast.h" #include "zstd_fast.h"
static void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable;
U32 const hBits = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
U32 const mls = cParams->minMatch;
const BYTE* const base = ms->window.base;
const BYTE* ip = base + ms->nextToUpdate;
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
const U32 fastHashFillStep = 3;
void ZSTD_fillHashTable(ZSTD_matchState_t* ms, /* Currently, we always use ZSTD_dtlm_full for filling CDict tables.
* Feel free to remove this assert if there's a good reason! */
assert(dtlm == ZSTD_dtlm_full);
/* Always insert every fastHashFillStep position into the hash table.
* Insert the other positions if their hash entry is empty.
*/
for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
U32 const curr = (U32)(ip - base);
{ size_t const hashAndTag = ZSTD_hashPtr(ip, hBits, mls);
ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr); }
if (dtlm == ZSTD_dtlm_fast) continue;
/* Only load extra positions for ZSTD_dtlm_full */
{ U32 p;
for (p = 1; p < fastHashFillStep; ++p) {
size_t const hashAndTag = ZSTD_hashPtr(ip + p, hBits, mls);
if (hashTable[hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { /* not yet filled */
ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr + p);
} } } }
}
static void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
const void* const end, const void* const end,
ZSTD_dictTableLoadMethod_e dtlm) ZSTD_dictTableLoadMethod_e dtlm)
{ {
@ -25,6 +60,10 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
const U32 fastHashFillStep = 3; const U32 fastHashFillStep = 3;
/* Currently, we always use ZSTD_dtlm_fast for filling CCtx tables.
* Feel free to remove this assert if there's a good reason! */
assert(dtlm == ZSTD_dtlm_fast);
/* Always insert every fastHashFillStep position into the hash table. /* Always insert every fastHashFillStep position into the hash table.
* Insert the other positions if their hash entry is empty. * Insert the other positions if their hash entry is empty.
*/ */
@ -42,6 +81,18 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
} } } } } } } }
} }
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp)
{
if (tfp == ZSTD_tfp_forCDict) {
ZSTD_fillHashTableForCDict(ms, end, dtlm);
} else {
ZSTD_fillHashTableForCCtx(ms, end, dtlm);
}
}
/* /*
* If you squint hard enough (and ignore repcodes), the search operation at any * If you squint hard enough (and ignore repcodes), the search operation at any
@ -117,7 +168,7 @@ ZSTD_compressBlock_fast_noDict_generic(
U32 rep_offset1 = rep[0]; U32 rep_offset1 = rep[0];
U32 rep_offset2 = rep[1]; U32 rep_offset2 = rep[1];
U32 offsetSaved = 0; U32 offsetSaved1 = 0, offsetSaved2 = 0;
size_t hash0; /* hash for ip0 */ size_t hash0; /* hash for ip0 */
size_t hash1; /* hash for ip1 */ size_t hash1; /* hash for ip1 */
@ -141,8 +192,8 @@ ZSTD_compressBlock_fast_noDict_generic(
{ U32 const curr = (U32)(ip0 - base); { U32 const curr = (U32)(ip0 - base);
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
U32 const maxRep = curr - windowLow; U32 const maxRep = curr - windowLow;
if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0; if (rep_offset2 > maxRep) offsetSaved2 = rep_offset2, rep_offset2 = 0;
if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0; if (rep_offset1 > maxRep) offsetSaved1 = rep_offset1, rep_offset1 = 0;
} }
/* start each op */ /* start each op */
@ -180,8 +231,14 @@ ZSTD_compressBlock_fast_noDict_generic(
mLength = ip0[-1] == match0[-1]; mLength = ip0[-1] == match0[-1];
ip0 -= mLength; ip0 -= mLength;
match0 -= mLength; match0 -= mLength;
offcode = STORE_REPCODE_1; offcode = REPCODE1_TO_OFFBASE;
mLength += 4; mLength += 4;
/* First write next hash table entry; we've already calculated it.
* This write is known to be safe because the ip1 is before the
* repcode (ip2). */
hashTable[hash1] = (U32)(ip1 - base);
goto _match; goto _match;
} }
@ -195,6 +252,12 @@ ZSTD_compressBlock_fast_noDict_generic(
/* check match at ip[0] */ /* check match at ip[0] */
if (MEM_read32(ip0) == mval) { if (MEM_read32(ip0) == mval) {
/* found a match! */ /* found a match! */
/* First write next hash table entry; we've already calculated it.
* This write is known to be safe because the ip1 == ip0 + 1, so
* we know we will resume searching after ip1 */
hashTable[hash1] = (U32)(ip1 - base);
goto _offset; goto _offset;
} }
@ -224,6 +287,21 @@ ZSTD_compressBlock_fast_noDict_generic(
/* check match at ip[0] */ /* check match at ip[0] */
if (MEM_read32(ip0) == mval) { if (MEM_read32(ip0) == mval) {
/* found a match! */ /* found a match! */
/* first write next hash table entry; we've already calculated it */
if (step <= 4) {
/* We need to avoid writing an index into the hash table >= the
* position at which we will pick up our searching after we've
* taken this match.
*
* The minimum possible match has length 4, so the earliest ip0
* can be after we take this match will be the current ip0 + 4.
* ip1 is ip0 + step - 1. If ip1 is >= ip0 + 4, we can't safely
* write this position.
*/
hashTable[hash1] = (U32)(ip1 - base);
}
goto _offset; goto _offset;
} }
@ -254,9 +332,24 @@ ZSTD_compressBlock_fast_noDict_generic(
* However, it seems to be a meaningful performance hit to try to search * However, it seems to be a meaningful performance hit to try to search
* them. So let's not. */ * them. So let's not. */
/* When the repcodes are outside of the prefix, we set them to zero before the loop.
* When the offsets are still zero, we need to restore them after the block to have a correct
* repcode history. If only one offset was invalid, it is easy. The tricky case is when both
* offsets were invalid. We need to figure out which offset to refill with.
* - If both offsets are zero they are in the same order.
* - If both offsets are non-zero, we won't restore the offsets from `offsetSaved[12]`.
* - If only one is zero, we need to decide which offset to restore.
* - If rep_offset1 is non-zero, then rep_offset2 must be offsetSaved1.
* - It is impossible for rep_offset2 to be non-zero.
*
* So if rep_offset1 started invalid (offsetSaved1 != 0) and became valid (rep_offset1 != 0), then
* set rep[0] = rep_offset1 and rep[1] = offsetSaved1.
*/
offsetSaved2 = ((offsetSaved1 != 0) && (rep_offset1 != 0)) ? offsetSaved1 : offsetSaved2;
/* save reps for next block */ /* save reps for next block */
rep[0] = rep_offset1 ? rep_offset1 : offsetSaved; rep[0] = rep_offset1 ? rep_offset1 : offsetSaved1;
rep[1] = rep_offset2 ? rep_offset2 : offsetSaved; rep[1] = rep_offset2 ? rep_offset2 : offsetSaved2;
/* Return the last literals size */ /* Return the last literals size */
return (size_t)(iend - anchor); return (size_t)(iend - anchor);
@ -267,7 +360,7 @@ ZSTD_compressBlock_fast_noDict_generic(
match0 = base + idx; match0 = base + idx;
rep_offset2 = rep_offset1; rep_offset2 = rep_offset1;
rep_offset1 = (U32)(ip0-match0); rep_offset1 = (U32)(ip0-match0);
offcode = STORE_OFFSET(rep_offset1); offcode = OFFSET_TO_OFFBASE(rep_offset1);
mLength = 4; mLength = 4;
/* Count the backwards match length. */ /* Count the backwards match length. */
@ -287,11 +380,6 @@ ZSTD_compressBlock_fast_noDict_generic(
ip0 += mLength; ip0 += mLength;
anchor = ip0; anchor = ip0;
/* write next hash table entry */
if (ip1 < ip0) {
hashTable[hash1] = (U32)(ip1 - base);
}
/* Fill table and check for immediate repcode. */ /* Fill table and check for immediate repcode. */
if (ip0 <= ilimit) { if (ip0 <= ilimit) {
/* Fill Table */ /* Fill Table */
@ -306,7 +394,7 @@ ZSTD_compressBlock_fast_noDict_generic(
{ U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */ { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
ip0 += rLength; ip0 += rLength;
ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength); ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
anchor = ip0; anchor = ip0;
continue; /* faster when present (confirmed on gcc-8) ... (?) */ continue; /* faster when present (confirmed on gcc-8) ... (?) */
} } } } } }
@ -380,14 +468,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
U32 const stepSize = cParams->targetLength + !(cParams->targetLength); U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
const BYTE* const base = ms->window.base; const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src; const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart; const BYTE* ip0 = istart;
const BYTE* ip1 = ip0 + stepSize; /* we assert below that stepSize >= 1 */
const BYTE* anchor = istart; const BYTE* anchor = istart;
const U32 prefixStartIndex = ms->window.dictLimit; const U32 prefixStartIndex = ms->window.dictLimit;
const BYTE* const prefixStart = base + prefixStartIndex; const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const iend = istart + srcSize; const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE; const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1]; U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
const ZSTD_matchState_t* const dms = ms->dictMatchState; const ZSTD_matchState_t* const dms = ms->dictMatchState;
const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
@ -397,13 +485,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const BYTE* const dictStart = dictBase + dictStartIndex; const BYTE* const dictStart = dictBase + dictStartIndex;
const BYTE* const dictEnd = dms->window.nextSrc; const BYTE* const dictEnd = dms->window.nextSrc;
const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase);
const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); const U32 dictAndPrefixLength = (U32)(istart - prefixStart + dictEnd - dictStart);
const U32 dictHLog = dictCParams->hashLog; const U32 dictHBits = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
/* if a dictionary is still attached, it necessarily means that /* if a dictionary is still attached, it necessarily means that
* it is within window size. So we just check it. */ * it is within window size. So we just check it. */
const U32 maxDistance = 1U << cParams->windowLog; const U32 maxDistance = 1U << cParams->windowLog;
const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
assert(endIndex - prefixStartIndex <= maxDistance); assert(endIndex - prefixStartIndex <= maxDistance);
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
@ -413,106 +501,155 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
* when translating a dict index into a local index */ * when translating a dict index into a local index */
assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
if (ms->prefetchCDictTables) {
size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
PREFETCH_AREA(dictHashTable, hashTableBytes)
}
/* init */ /* init */
DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
ip += (dictAndPrefixLength == 0); ip0 += (dictAndPrefixLength == 0);
/* dictMatchState repCode checks don't currently handle repCode == 0 /* dictMatchState repCode checks don't currently handle repCode == 0
* disabling. */ * disabling. */
assert(offset_1 <= dictAndPrefixLength); assert(offset_1 <= dictAndPrefixLength);
assert(offset_2 <= dictAndPrefixLength); assert(offset_2 <= dictAndPrefixLength);
/* Main Search Loop */ /* Outer search loop */
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ assert(stepSize >= 1);
while (ip1 <= ilimit) { /* repcode check at (ip0 + 1) is safe because ip0 < ip1 */
size_t mLength; size_t mLength;
size_t const h = ZSTD_hashPtr(ip, hlog, mls); size_t hash0 = ZSTD_hashPtr(ip0, hlog, mls);
U32 const curr = (U32)(ip-base);
U32 const matchIndex = hashTable[h];
const BYTE* match = base + matchIndex;
const U32 repIndex = curr + 1 - offset_1;
const BYTE* repMatch = (repIndex < prefixStartIndex) ?
dictBase + (repIndex - dictIndexDelta) :
base + repIndex;
hashTable[h] = curr; /* update hash table */
if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ size_t const dictHashAndTag0 = ZSTD_hashPtr(ip0, dictHBits, mls);
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { U32 dictMatchIndexAndTag = dictHashTable[dictHashAndTag0 >> ZSTD_SHORT_CACHE_TAG_BITS];
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; int dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag0);
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++; U32 matchIndex = hashTable[hash0];
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength); U32 curr = (U32)(ip0 - base);
} else if ( (matchIndex <= prefixStartIndex) ) { size_t step = stepSize;
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); const size_t kStepIncr = 1 << kSearchStrength;
U32 const dictMatchIndex = dictHashTable[dictHash]; const BYTE* nextStep = ip0 + kStepIncr;
const BYTE* dictMatch = dictBase + dictMatchIndex;
if (dictMatchIndex <= dictStartIndex || /* Inner search loop */
MEM_read32(dictMatch) != MEM_read32(ip)) { while (1) {
assert(stepSize >= 1); const BYTE* match = base + matchIndex;
ip += ((ip-anchor) >> kSearchStrength) + stepSize; const U32 repIndex = curr + 1 - offset_1;
continue; const BYTE* repMatch = (repIndex < prefixStartIndex) ?
} else { dictBase + (repIndex - dictIndexDelta) :
/* found a dict match */ base + repIndex;
U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta); const size_t hash1 = ZSTD_hashPtr(ip1, hlog, mls);
mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls);
while (((ip>anchor) & (dictMatch>dictStart)) hashTable[hash0] = curr; /* update hash table */
&& (ip[-1] == dictMatch[-1])) {
ip--; dictMatch--; mLength++; if (((U32) ((prefixStartIndex - 1) - repIndex) >=
3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
&& (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) {
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4;
ip0++;
ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
break;
}
if (dictTagsMatch) {
/* Found a possible dict match */
const U32 dictMatchIndex = dictMatchIndexAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
const BYTE* dictMatch = dictBase + dictMatchIndex;
if (dictMatchIndex > dictStartIndex &&
MEM_read32(dictMatch) == MEM_read32(ip0)) {
/* To replicate extDict parse behavior, we only use dict matches when the normal matchIndex is invalid */
if (matchIndex <= prefixStartIndex) {
U32 const offset = (U32) (curr - dictMatchIndex - dictIndexDelta);
mLength = ZSTD_count_2segments(ip0 + 4, dictMatch + 4, iend, dictEnd, prefixStart) + 4;
while (((ip0 > anchor) & (dictMatch > dictStart))
&& (ip0[-1] == dictMatch[-1])) {
ip0--;
dictMatch--;
mLength++;
} /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
break;
}
}
}
if (matchIndex > prefixStartIndex && MEM_read32(match) == MEM_read32(ip0)) {
/* found a regular match */
U32 const offset = (U32) (ip0 - match);
mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4;
while (((ip0 > anchor) & (match > prefixStart))
&& (ip0[-1] == match[-1])) {
ip0--;
match--;
mLength++;
} /* catch up */ } /* catch up */
offset_2 = offset_1; offset_2 = offset_1;
offset_1 = offset; offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
break;
} }
} else if (MEM_read32(match) != MEM_read32(ip)) {
/* it's not a match, and we're not going to check the dictionary */ /* Prepare for next iteration */
assert(stepSize >= 1); dictMatchIndexAndTag = dictHashTable[dictHashAndTag1 >> ZSTD_SHORT_CACHE_TAG_BITS];
ip += ((ip-anchor) >> kSearchStrength) + stepSize; dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag1);
continue; matchIndex = hashTable[hash1];
} else {
/* found a regular match */ if (ip1 >= nextStep) {
U32 const offset = (U32)(ip-match); step++;
mLength = ZSTD_count(ip+4, match+4, iend) + 4; nextStep += kStepIncr;
while (((ip>anchor) & (match>prefixStart)) }
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ ip0 = ip1;
offset_2 = offset_1; ip1 = ip1 + step;
offset_1 = offset; if (ip1 > ilimit) goto _cleanup;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
} curr = (U32)(ip0 - base);
hash0 = hash1;
} /* end inner search loop */
/* match found */ /* match found */
ip += mLength; assert(mLength);
anchor = ip; ip0 += mLength;
anchor = ip0;
if (ip <= ilimit) { if (ip0 <= ilimit) {
/* Fill Table */ /* Fill Table */
assert(base+curr+2 > istart); /* check base overflow */ assert(base+curr+2 > istart); /* check base overflow */
hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
/* check immediate repcode */ /* check immediate repcode */
while (ip <= ilimit) { while (ip0 <= ilimit) {
U32 const current2 = (U32)(ip-base); U32 const current2 = (U32)(ip0-base);
U32 const repIndex2 = current2 - offset_2; U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
dictBase - dictIndexDelta + repIndex2 : dictBase - dictIndexDelta + repIndex2 :
base + repIndex2; base + repIndex2;
if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) { && (MEM_read32(repMatch2) == MEM_read32(ip0))) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2); ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = current2;
ip += repLength2; ip0 += repLength2;
anchor = ip; anchor = ip0;
continue; continue;
} }
break; break;
} }
} }
/* Prepare for next iteration */
assert(ip0 == anchor);
ip1 = ip0 + stepSize;
} }
_cleanup:
/* save reps for next block */ /* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved; rep[0] = offset_1;
rep[1] = offset_2 ? offset_2 : offsetSaved; rep[1] = offset_2;
/* Return the last literals size */ /* Return the last literals size */
return (size_t)(iend - anchor); return (size_t)(iend - anchor);
@ -553,11 +690,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
U32* const hashTable = ms->hashTable; U32* const hashTable = ms->hashTable;
U32 const hlog = cParams->hashLog; U32 const hlog = cParams->hashLog;
/* support stepSize of 0 */ /* support stepSize of 0 */
U32 const stepSize = cParams->targetLength + !(cParams->targetLength); size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
const BYTE* const base = ms->window.base; const BYTE* const base = ms->window.base;
const BYTE* const dictBase = ms->window.dictBase; const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const istart = (const BYTE*)src; const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart; const BYTE* anchor = istart;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
@ -570,6 +706,28 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* const iend = istart + srcSize; const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8; const BYTE* const ilimit = iend - 8;
U32 offset_1=rep[0], offset_2=rep[1]; U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved1 = 0, offsetSaved2 = 0;
const BYTE* ip0 = istart;
const BYTE* ip1;
const BYTE* ip2;
const BYTE* ip3;
U32 current0;
size_t hash0; /* hash for ip0 */
size_t hash1; /* hash for ip1 */
U32 idx; /* match idx for ip0 */
const BYTE* idxBase; /* base pointer for idx */
U32 offcode;
const BYTE* match0;
size_t mLength;
const BYTE* matchEnd = 0; /* initialize to avoid warning, assert != 0 later */
size_t step;
const BYTE* nextStep;
const size_t kStepIncr = (1 << (kSearchStrength - 1));
(void)hasStep; /* not currently specialized on whether it's accelerated */ (void)hasStep; /* not currently specialized on whether it's accelerated */
@ -579,75 +737,202 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
if (prefixStartIndex == dictStartIndex) if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize); return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
/* Search Loop */ { U32 const curr = (U32)(ip0 - base);
while (ip < ilimit) { /* < instead of <=, because (ip+1) */ U32 const maxRep = curr - dictStartIndex;
const size_t h = ZSTD_hashPtr(ip, hlog, mls); if (offset_2 >= maxRep) offsetSaved2 = offset_2, offset_2 = 0;
const U32 matchIndex = hashTable[h]; if (offset_1 >= maxRep) offsetSaved1 = offset_1, offset_1 = 0;
const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; }
const BYTE* match = matchBase + matchIndex;
const U32 curr = (U32)(ip-base);
const U32 repIndex = curr + 1 - offset_1;
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
hashTable[h] = curr; /* update hash table */
DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ /* start each op */
& (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */ _start: /* Requires: ip0 */
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; step = stepSize;
size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; nextStep = ip0 + kStepIncr;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength); /* calculate positions, ip0 - anchor == 0, so we skip step calc */
ip += rLength; ip1 = ip0 + 1;
anchor = ip; ip2 = ip0 + step;
} else { ip3 = ip2 + 1;
if ( (matchIndex < dictStartIndex) ||
(MEM_read32(match) != MEM_read32(ip)) ) { if (ip3 >= ilimit) {
assert(stepSize >= 1); goto _cleanup;
ip += ((ip-anchor) >> kSearchStrength) + stepSize; }
continue;
hash0 = ZSTD_hashPtr(ip0, hlog, mls);
hash1 = ZSTD_hashPtr(ip1, hlog, mls);
idx = hashTable[hash0];
idxBase = idx < prefixStartIndex ? dictBase : base;
do {
{ /* load repcode match for ip[2] */
U32 const current2 = (U32)(ip2 - base);
U32 const repIndex = current2 - offset_1;
const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
U32 rval;
if ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */
& (offset_1 > 0) ) {
rval = MEM_read32(repBase + repIndex);
} else {
rval = MEM_read32(ip2) ^ 1; /* guaranteed to not match. */
} }
{ const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; /* write back hash table entry */
U32 const offset = curr - matchIndex; current0 = (U32)(ip0 - base);
size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; hashTable[hash0] = current0;
while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1; offset_1 = offset; /* update offset history */ /* check repcode at ip[2] */
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength); if (MEM_read32(ip2) == rval) {
ip += mLength; ip0 = ip2;
anchor = ip; match0 = repBase + repIndex;
matchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
assert((match0 != prefixStart) & (match0 != dictStart));
mLength = ip0[-1] == match0[-1];
ip0 -= mLength;
match0 -= mLength;
offcode = REPCODE1_TO_OFFBASE;
mLength += 4;
goto _match;
} } } }
if (ip <= ilimit) { { /* load match for ip[0] */
/* Fill Table */ U32 const mval = idx >= dictStartIndex ?
hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; MEM_read32(idxBase + idx) :
hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); MEM_read32(ip0) ^ 1; /* guaranteed not to match */
/* check immediate repcode */
while (ip <= ilimit) { /* check match at ip[0] */
U32 const current2 = (U32)(ip-base); if (MEM_read32(ip0) == mval) {
U32 const repIndex2 = current2 - offset_2; /* found a match! */
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; goto _offset;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */ } }
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; /* lookup ip[1] */
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; idx = hashTable[hash1];
{ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ idxBase = idx < prefixStartIndex ? dictBase : base;
ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2);
hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; /* hash ip[2] */
ip += repLength2; hash0 = hash1;
anchor = ip; hash1 = ZSTD_hashPtr(ip2, hlog, mls);
continue;
} /* advance to next positions */
break; ip0 = ip1;
} } } ip1 = ip2;
ip2 = ip3;
/* write back hash table entry */
current0 = (U32)(ip0 - base);
hashTable[hash0] = current0;
{ /* load match for ip[0] */
U32 const mval = idx >= dictStartIndex ?
MEM_read32(idxBase + idx) :
MEM_read32(ip0) ^ 1; /* guaranteed not to match */
/* check match at ip[0] */
if (MEM_read32(ip0) == mval) {
/* found a match! */
goto _offset;
} }
/* lookup ip[1] */
idx = hashTable[hash1];
idxBase = idx < prefixStartIndex ? dictBase : base;
/* hash ip[2] */
hash0 = hash1;
hash1 = ZSTD_hashPtr(ip2, hlog, mls);
/* advance to next positions */
ip0 = ip1;
ip1 = ip2;
ip2 = ip0 + step;
ip3 = ip1 + step;
/* calculate step */
if (ip2 >= nextStep) {
step++;
PREFETCH_L1(ip1 + 64);
PREFETCH_L1(ip1 + 128);
nextStep += kStepIncr;
}
} while (ip3 < ilimit);
_cleanup:
/* Note that there are probably still a couple positions we could search.
* However, it seems to be a meaningful performance hit to try to search
* them. So let's not. */
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
/* save reps for next block */ /* save reps for next block */
rep[0] = offset_1; rep[0] = offset_1 ? offset_1 : offsetSaved1;
rep[1] = offset_2; rep[1] = offset_2 ? offset_2 : offsetSaved2;
/* Return the last literals size */ /* Return the last literals size */
return (size_t)(iend - anchor); return (size_t)(iend - anchor);
_offset: /* Requires: ip0, idx, idxBase */
/* Compute the offset code. */
{ U32 const offset = current0 - idx;
const BYTE* const lowMatchPtr = idx < prefixStartIndex ? dictStart : prefixStart;
matchEnd = idx < prefixStartIndex ? dictEnd : iend;
match0 = idxBase + idx;
offset_2 = offset_1;
offset_1 = offset;
offcode = OFFSET_TO_OFFBASE(offset);
mLength = 4;
/* Count the backwards match length. */
while (((ip0>anchor) & (match0>lowMatchPtr)) && (ip0[-1] == match0[-1])) {
ip0--;
match0--;
mLength++;
} }
_match: /* Requires: ip0, match0, offcode, matchEnd */
/* Count the forward length. */
assert(matchEnd != 0);
mLength += ZSTD_count_2segments(ip0 + mLength, match0 + mLength, iend, matchEnd, prefixStart);
ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
ip0 += mLength;
anchor = ip0;
/* write next hash table entry */
if (ip1 < ip0) {
hashTable[hash1] = (U32)(ip1 - base);
}
/* Fill table and check for immediate repcode. */
if (ip0 <= ilimit) {
/* Fill Table */
assert(base+current0+2 > istart); /* check base overflow */
hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
while (ip0 <= ilimit) {
U32 const repIndex2 = (U32)(ip0-base) - offset_2;
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 > 0)) /* intentional underflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip0)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
{ U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
ip0 += repLength2;
anchor = ip0;
continue;
}
break;
} }
goto _start;
} }
ZSTD_GEN_FAST_FN(extDict, 4, 0) ZSTD_GEN_FAST_FN(extDict, 4, 0)
@ -660,6 +945,7 @@ size_t ZSTD_compressBlock_fast_extDict(
void const* src, size_t srcSize) void const* src, size_t srcSize)
{ {
U32 const mls = ms->cParams.minMatch; U32 const mls = ms->cParams.minMatch;
assert(ms->dictMatchState == NULL);
switch(mls) switch(mls)
{ {
default: /* includes case 3 */ default: /* includes case 3 */

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -16,7 +17,8 @@
#include "zstd_compress_internal.h" #include "zstd_compress_internal.h"
void ZSTD_fillHashTable(ZSTD_matchState_t* ms, void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm); void const* end, ZSTD_dictTableLoadMethod_e dtlm,
ZSTD_tableFillPurpose_e tfp);
size_t ZSTD_compressBlock_fast( size_t ZSTD_compressBlock_fast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -22,6 +23,8 @@
*/ */
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2 #define ZSTD_LAZY_DDSS_BUCKET_LOG 2
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip); void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
@ -113,7 +116,7 @@ size_t ZSTD_compressBlock_lazy2_extDict_row(
size_t ZSTD_compressBlock_btlazy2_extDict( size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize); void const* src, size_t srcSize);
#endif /* ZSTD_LAZY_H */ #endif /* ZSTD_LAZY_H */

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -242,11 +243,11 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
switch(ms->cParams.strategy) switch(ms->cParams.strategy)
{ {
case ZSTD_fast: case ZSTD_fast:
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
break; break;
case ZSTD_dfast: case ZSTD_dfast:
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
break; break;
case ZSTD_greedy: case ZSTD_greedy:
@ -549,7 +550,7 @@ size_t ZSTD_ldm_generateSequences(
* the window through early invalidation. * the window through early invalidation.
* TODO: * Test the chunk size. * TODO: * Test the chunk size.
* * Try invalidation after the sequence generation and test the * * Try invalidation after the sequence generation and test the
* the offset against maxDist directly. * offset against maxDist directly.
* *
* NOTE: Because of dictionaries + sequence splitting we MUST make sure * NOTE: Because of dictionaries + sequence splitting we MUST make sure
* that any offset used is valid at the END of the sequence, since it may * that any offset used is valid at the END of the sequence, since it may
@ -711,7 +712,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
rep[0] = sequence.offset; rep[0] = sequence.offset;
/* Store the sequence */ /* Store the sequence */
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
STORE_OFFSET(sequence.offset), OFFSET_TO_OFFBASE(sequence.offset),
sequence.matchLength); sequence.matchLength);
ip += sequence.matchLength; ip += sequence.matchLength;
} }

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -16,7 +17,7 @@
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
#define ZSTD_MAX_PRICE (1<<30) #define ZSTD_MAX_PRICE (1<<30)
#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ #define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
/*-************************************* /*-*************************************
@ -26,27 +27,35 @@
#if 0 /* approximation at bit level (for tests) */ #if 0 /* approximation at bit level (for tests) */
# define BITCOST_ACCURACY 0 # define BITCOST_ACCURACY 0
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat)) # define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
#elif 0 /* fractional bit accuracy (for tests) */ #elif 0 /* fractional bit accuracy (for tests) */
# define BITCOST_ACCURACY 8 # define BITCOST_ACCURACY 8
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) # define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
#else /* opt==approx, ultra==accurate */ #else /* opt==approx, ultra==accurate */
# define BITCOST_ACCURACY 8 # define BITCOST_ACCURACY 8
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) # define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
#endif #endif
/* ZSTD_bitWeight() :
* provide estimated "cost" of a stat in full bits only */
MEM_STATIC U32 ZSTD_bitWeight(U32 stat) MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
{ {
return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
} }
/* ZSTD_fracWeight() :
* provide fractional-bit "cost" of a stat,
* using linear interpolation approximation */
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
{ {
U32 const stat = rawStat + 1; U32 const stat = rawStat + 1;
U32 const hb = ZSTD_highbit32(stat); U32 const hb = ZSTD_highbit32(stat);
U32 const BWeight = hb * BITCOST_MULTIPLIER; U32 const BWeight = hb * BITCOST_MULTIPLIER;
/* Fweight was meant for "Fractional weight"
* but it's effectively a value between 1 and 2
* using fixed point arithmetic */
U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
U32 const weight = BWeight + FWeight; U32 const weight = BWeight + FWeight;
assert(hb + BITCOST_ACCURACY < 31); assert(hb + BITCOST_ACCURACY < 31);
@ -57,7 +66,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
/* debugging function, /* debugging function,
* @return price in bytes as fractional value * @return price in bytes as fractional value
* for debug messages only */ * for debug messages only */
MEM_STATIC double ZSTD_fCost(U32 price) MEM_STATIC double ZSTD_fCost(int price)
{ {
return (double)price / (BITCOST_MULTIPLIER*8); return (double)price / (BITCOST_MULTIPLIER*8);
} }
@ -88,20 +97,26 @@ static U32 sum_u32(const unsigned table[], size_t nbElts)
return total; return total;
} }
static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift) typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
static U32
ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
{ {
U32 s, sum=0; U32 s, sum=0;
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift); DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
(unsigned)lastEltIndex+1, (unsigned)shift );
assert(shift < 30); assert(shift < 30);
for (s=0; s<lastEltIndex+1; s++) { for (s=0; s<lastEltIndex+1; s++) {
table[s] = 1 + (table[s] >> shift); unsigned const base = base1 ? 1 : (table[s]>0);
sum += table[s]; unsigned const newStat = base + (table[s] >> shift);
sum += newStat;
table[s] = newStat;
} }
return sum; return sum;
} }
/* ZSTD_scaleStats() : /* ZSTD_scaleStats() :
* reduce all elements in table is sum too large * reduce all elt frequencies in table if sum too large
* return the resulting sum of elements */ * return the resulting sum of elements */
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
{ {
@ -110,7 +125,7 @@ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget); DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
assert(logTarget < 30); assert(logTarget < 30);
if (factor <= 1) return prevsum; if (factor <= 1) return prevsum;
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor)); return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
} }
/* ZSTD_rescaleFreqs() : /* ZSTD_rescaleFreqs() :
@ -129,18 +144,22 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
optPtr->priceType = zop_dynamic; optPtr->priceType = zop_dynamic;
if (optPtr->litLengthSum == 0) { /* first block : init */ if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); /* heuristic: use pre-defined stats for too small inputs */
if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
optPtr->priceType = zop_predef; optPtr->priceType = zop_predef;
} }
assert(optPtr->symbolCosts != NULL); assert(optPtr->symbolCosts != NULL);
if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
/* huffman table presumed generated by dictionary */
/* huffman stats covering the full value set : table presumed generated by dictionary */
optPtr->priceType = zop_dynamic; optPtr->priceType = zop_dynamic;
if (compressedLiterals) { if (compressedLiterals) {
/* generate literals statistics from huffman table */
unsigned lit; unsigned lit;
assert(optPtr->litFreq != NULL); assert(optPtr->litFreq != NULL);
optPtr->litSum = 0; optPtr->litSum = 0;
@ -188,13 +207,14 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
optPtr->offCodeSum += optPtr->offCodeFreq[of]; optPtr->offCodeSum += optPtr->offCodeFreq[of];
} } } }
} else { /* not a dictionary */ } else { /* first block, no dictionary */
assert(optPtr->litFreq != NULL); assert(optPtr->litFreq != NULL);
if (compressedLiterals) { if (compressedLiterals) {
/* base initial cost of literals on direct frequency within src */
unsigned lit = MaxLit; unsigned lit = MaxLit;
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8); optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
} }
{ unsigned const baseLLfreqs[MaxLL+1] = { { unsigned const baseLLfreqs[MaxLL+1] = {
@ -224,10 +244,9 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1); optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
} }
} }
} else { /* new block : re-use previous statistics, scaled down */ } else { /* new block : scale down accumulated statistics */
if (compressedLiterals) if (compressedLiterals)
optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12); optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
@ -255,11 +274,14 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
/* dynamic statistics */ /* dynamic statistics */
{ U32 price = litLength * optPtr->litSumBasePrice; { U32 price = optPtr->litSumBasePrice * litLength;
U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
U32 u; U32 u;
assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
for (u=0; u < litLength; u++) { for (u=0; u < litLength; u++) {
assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */ U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
price -= litPrice;
} }
return price; return price;
} }
@ -272,10 +294,11 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
assert(litLength <= ZSTD_BLOCKSIZE_MAX); assert(litLength <= ZSTD_BLOCKSIZE_MAX);
if (optPtr->priceType == zop_predef) if (optPtr->priceType == zop_predef)
return WEIGHT(litLength, optLevel); return WEIGHT(litLength, optLevel);
/* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
* because it isn't representable in the zstd format. So instead just /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
* call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block * because it isn't representable in the zstd format.
* would be all literals. * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
* In such a case, the block would be all literals.
*/ */
if (litLength == ZSTD_BLOCKSIZE_MAX) if (litLength == ZSTD_BLOCKSIZE_MAX)
return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel); return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
@ -289,24 +312,25 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
} }
/* ZSTD_getMatchPrice() : /* ZSTD_getMatchPrice() :
* Provides the cost of the match part (offset + matchLength) of a sequence * Provides the cost of the match part (offset + matchLength) of a sequence.
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
* @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2 * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
* @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
*/ */
FORCE_INLINE_TEMPLATE U32 FORCE_INLINE_TEMPLATE U32
ZSTD_getMatchPrice(U32 const offcode, ZSTD_getMatchPrice(U32 const offBase,
U32 const matchLength, U32 const matchLength,
const optState_t* const optPtr, const optState_t* const optPtr,
int const optLevel) int const optLevel)
{ {
U32 price; U32 price;
U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode)); U32 const offCode = ZSTD_highbit32(offBase);
U32 const mlBase = matchLength - MINMATCH; U32 const mlBase = matchLength - MINMATCH;
assert(matchLength >= MINMATCH); assert(matchLength >= MINMATCH);
if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); return WEIGHT(mlBase, optLevel)
+ ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
/* dynamic statistics */ /* dynamic statistics */
price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
@ -325,10 +349,10 @@ ZSTD_getMatchPrice(U32 const offcode,
} }
/* ZSTD_updateStats() : /* ZSTD_updateStats() :
* assumption : literals + litLengtn <= iend */ * assumption : literals + litLength <= iend */
static void ZSTD_updateStats(optState_t* const optPtr, static void ZSTD_updateStats(optState_t* const optPtr,
U32 litLength, const BYTE* literals, U32 litLength, const BYTE* literals,
U32 offsetCode, U32 matchLength) U32 offBase, U32 matchLength)
{ {
/* literals */ /* literals */
if (ZSTD_compressedLiterals(optPtr)) { if (ZSTD_compressedLiterals(optPtr)) {
@ -344,8 +368,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
optPtr->litLengthSum++; optPtr->litLengthSum++;
} }
/* offset code : expected to follow storeSeq() numeric representation */ /* offset code : follows storeSeq() numeric representation */
{ U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode)); { U32 const offCode = ZSTD_highbit32(offBase);
assert(offCode <= MaxOff); assert(offCode <= MaxOff);
optPtr->offCodeFreq[offCode]++; optPtr->offCodeFreq[offCode]++;
optPtr->offCodeSum++; optPtr->offCodeSum++;
@ -552,16 +576,17 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
} }
FORCE_INLINE_TEMPLATE FORCE_INLINE_TEMPLATE U32
U32 ZSTD_insertBtAndGetAllMatches ( ZSTD_insertBtAndGetAllMatches (
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
ZSTD_matchState_t* ms, ZSTD_matchState_t* ms,
U32* nextToUpdate3, U32* nextToUpdate3,
const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, const BYTE* const ip, const BYTE* const iLimit,
const U32 rep[ZSTD_REP_NUM], const ZSTD_dictMode_e dictMode,
U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ const U32 rep[ZSTD_REP_NUM],
const U32 lengthToBeat, const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
U32 const mls /* template */) const U32 lengthToBeat,
const U32 mls /* template */)
{ {
const ZSTD_compressionParameters* const cParams = &ms->cParams; const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
@ -644,7 +669,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
repCode, ll0, repOffset, repLen); repCode, ll0, repOffset, repLen);
bestLength = repLen; bestLength = repLen;
matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1); /* expect value between 1 and 3 */ matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
matches[mnum].len = (U32)repLen; matches[mnum].len = (U32)repLen;
mnum++; mnum++;
if ( (repLen > sufficient_len) if ( (repLen > sufficient_len)
@ -673,7 +698,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
bestLength = mlen; bestLength = mlen;
assert(curr > matchIndex3); assert(curr > matchIndex3);
assert(mnum==0); /* no prior solution */ assert(mnum==0); /* no prior solution */
matches[0].off = STORE_OFFSET(curr - matchIndex3); matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
matches[0].len = (U32)mlen; matches[0].len = (U32)mlen;
mnum = 1; mnum = 1;
if ( (mlen > sufficient_len) | if ( (mlen > sufficient_len) |
@ -706,13 +731,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
} }
if (matchLength > bestLength) { if (matchLength > bestLength) {
DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
(U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex)); (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
assert(matchEndIdx > matchIndex); assert(matchEndIdx > matchIndex);
if (matchLength > matchEndIdx - matchIndex) if (matchLength > matchEndIdx - matchIndex)
matchEndIdx = matchIndex + (U32)matchLength; matchEndIdx = matchIndex + (U32)matchLength;
bestLength = matchLength; bestLength = matchLength;
matches[mnum].off = STORE_OFFSET(curr - matchIndex); matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
matches[mnum].len = (U32)matchLength; matches[mnum].len = (U32)matchLength;
mnum++; mnum++;
if ( (matchLength > ZSTD_OPT_NUM) if ( (matchLength > ZSTD_OPT_NUM)
@ -754,12 +779,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
if (matchLength > bestLength) { if (matchLength > bestLength) {
matchIndex = dictMatchIndex + dmsIndexDelta; matchIndex = dictMatchIndex + dmsIndexDelta;
DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
(U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex)); (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
if (matchLength > matchEndIdx - matchIndex) if (matchLength > matchEndIdx - matchIndex)
matchEndIdx = matchIndex + (U32)matchLength; matchEndIdx = matchIndex + (U32)matchLength;
bestLength = matchLength; bestLength = matchLength;
matches[mnum].off = STORE_OFFSET(curr - matchIndex); matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
matches[mnum].len = (U32)matchLength; matches[mnum].len = (U32)matchLength;
mnum++; mnum++;
if ( (matchLength > ZSTD_OPT_NUM) if ( (matchLength > ZSTD_OPT_NUM)
@ -960,7 +985,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
const ZSTD_optLdm_t* optLdm, U32 currPosInBlock) const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
{ {
U32 const posDiff = currPosInBlock - optLdm->startPosInBlock; U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
/* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */ /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
/* Ensure that current block position is not outside of the match */ /* Ensure that current block position is not outside of the match */
@ -971,11 +996,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
} }
if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
U32 const candidateOffCode = STORE_OFFSET(optLdm->offset); U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u", DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
candidateOffCode, candidateMatchLength, currPosInBlock); candidateOffBase, candidateMatchLength, currPosInBlock);
matches[*nbMatches].len = candidateMatchLength; matches[*nbMatches].len = candidateMatchLength;
matches[*nbMatches].off = candidateOffCode; matches[*nbMatches].off = candidateOffBase;
(*nbMatches)++; (*nbMatches)++;
} }
} }
@ -1062,6 +1087,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
ZSTD_optimal_t lastSequence; ZSTD_optimal_t lastSequence;
ZSTD_optLdm_t optLdm; ZSTD_optLdm_t optLdm;
ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip)); ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
@ -1098,14 +1125,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
/* large match -> immediate encoding */ /* large match -> immediate encoding */
{ U32 const maxML = matches[nbMatches-1].len; { U32 const maxML = matches[nbMatches-1].len;
U32 const maxOffcode = matches[nbMatches-1].off; U32 const maxOffBase = matches[nbMatches-1].off;
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart)); nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
if (maxML > sufficient_len) { if (maxML > sufficient_len) {
lastSequence.litlen = litlen; lastSequence.litlen = litlen;
lastSequence.mlen = maxML; lastSequence.mlen = maxML;
lastSequence.off = maxOffcode; lastSequence.off = maxOffBase;
DEBUGLOG(6, "large match (%u>%u), immediate encoding", DEBUGLOG(6, "large match (%u>%u), immediate encoding",
maxML, sufficient_len); maxML, sufficient_len);
cur = 0; cur = 0;
@ -1122,15 +1149,15 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
} }
for (matchNb = 0; matchNb < nbMatches; matchNb++) { for (matchNb = 0; matchNb < nbMatches; matchNb++) {
U32 const offcode = matches[matchNb].off; U32 const offBase = matches[matchNb].off;
U32 const end = matches[matchNb].len; U32 const end = matches[matchNb].len;
for ( ; pos <= end ; pos++ ) { for ( ; pos <= end ; pos++ ) {
U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel); U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
U32 const sequencePrice = literalsPrice + matchPrice; U32 const sequencePrice = literalsPrice + matchPrice;
DEBUGLOG(7, "rPos:%u => set initial price : %.2f", DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
pos, ZSTD_fCost(sequencePrice)); pos, ZSTD_fCost((int)sequencePrice));
opt[pos].mlen = pos; opt[pos].mlen = pos;
opt[pos].off = offcode; opt[pos].off = offBase;
opt[pos].litlen = litlen; opt[pos].litlen = litlen;
opt[pos].price = (int)sequencePrice; opt[pos].price = (int)sequencePrice;
} } } }
@ -1230,7 +1257,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
U32 mlen; U32 mlen;
DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u", DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
matchNb, matches[matchNb].off, lastML, litlen); matchNb, matches[matchNb].off, lastML, litlen);
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
@ -1296,7 +1323,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
for (storePos=storeStart; storePos <= storeEnd; storePos++) { for (storePos=storeStart; storePos <= storeEnd; storePos++) {
U32 const llen = opt[storePos].litlen; U32 const llen = opt[storePos].litlen;
U32 const mlen = opt[storePos].mlen; U32 const mlen = opt[storePos].mlen;
U32 const offCode = opt[storePos].off; U32 const offBase = opt[storePos].off;
U32 const advance = llen + mlen; U32 const advance = llen + mlen;
DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
anchor - istart, (unsigned)llen, (unsigned)mlen); anchor - istart, (unsigned)llen, (unsigned)mlen);
@ -1308,8 +1335,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
} }
assert(anchor + llen <= iend); assert(anchor + llen <= iend);
ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen); ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
anchor += advance; anchor += advance;
ip = anchor; ip = anchor;
} } } }
@ -1349,7 +1376,7 @@ size_t ZSTD_compressBlock_btopt(
/* ZSTD_initStats_ultra(): /* ZSTD_initStats_ultra():
* make a first compression pass, just to seed stats with more accurate starting values. * make a first compression pass, just to seed stats with more accurate starting values.
* only works on first block, with no dictionary and no ldm. * only works on first block, with no dictionary and no ldm.
* this function cannot error, hence its contract must be respected. * this function cannot error out, its narrow contract must be respected.
*/ */
static void static void
ZSTD_initStats_ultra(ZSTD_matchState_t* ms, ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
@ -1368,7 +1395,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/ ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
/* invalidate first scan from history */ /* invalidate first scan from history, only keep entropy stats */
ZSTD_resetSeqStore(seqStore); ZSTD_resetSeqStore(seqStore);
ms->window.base -= srcSize; ms->window.base -= srcSize;
ms->window.dictLimit += (U32)srcSize; ms->window.dictLimit += (U32)srcSize;
@ -1392,20 +1419,20 @@ size_t ZSTD_compressBlock_btultra2(
U32 const curr = (U32)((const BYTE*)src - ms->window.base); U32 const curr = (U32)((const BYTE*)src - ms->window.base);
DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
/* 2-pass strategy: /* 2-passes strategy:
* this strategy makes a first pass over first block to collect statistics * this strategy makes a first pass over first block to collect statistics
* and seed next round's statistics with it. * in order to seed next round's statistics with it.
* After 1st pass, function forgets everything, and starts a new block. * After 1st pass, function forgets history, and starts a new block.
* Consequently, this can only work if no data has been previously loaded in tables, * Consequently, this can only work if no data has been previously loaded in tables,
* aka, no dictionary, no prefix, no ldm preprocessing. * aka, no dictionary, no prefix, no ldm preprocessing.
* The compression ratio gain is generally small (~0.5% on first block), * The compression ratio gain is generally small (~0.5% on first block),
* the cost is 2x cpu time on first block. */ ** the cost is 2x cpu time on first block. */
assert(srcSize <= ZSTD_BLOCKSIZE_MAX); assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
if ( (ms->opt.litLengthSum==0) /* first block */ if ( (ms->opt.litLengthSum==0) /* first block */
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
&& (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
&& (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
&& (srcSize > ZSTD_PREDEF_THRESHOLD) && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
) { ) {
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
} }

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -14,12 +15,12 @@
/*-******************************************************* /*-*******************************************************
* Dependencies * Dependencies
*********************************************************/ *********************************************************/
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
#include "../common/cpu.h" /* bmi2 */ #include "../common/cpu.h" /* bmi2 */
#include "../common/mem.h" /* low level memory routines */ #include "../common/mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY #define FSE_STATIC_LINKING_ONLY
#include "../common/fse.h" #include "../common/fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h" #include "../common/huf.h"
#include "zstd_decompress_internal.h" #include "zstd_decompress_internal.h"
#include "zstd_ddict.h" #include "zstd_ddict.h"
@ -131,7 +132,7 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
ZSTD_memcpy(internalBuffer, dict, dictSize); ZSTD_memcpy(internalBuffer, dict, dictSize);
} }
ddict->dictSize = dictSize; ddict->dictSize = dictSize;
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */
/* parse dictionary content */ /* parse dictionary content */
FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
@ -237,5 +238,5 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
{ {
if (ddict==NULL) return 0; if (ddict==NULL) return 0;
return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); return ddict->dictID;
} }

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -52,17 +53,18 @@
/*-******************************************************* /*-*******************************************************
* Dependencies * Dependencies
*********************************************************/ *********************************************************/
#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
#include "../common/mem.h" /* low level memory routines */ #include "../common/mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY #define FSE_STATIC_LINKING_ONLY
#include "../common/fse.h" #include "../common/fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h" #include "../common/huf.h"
#include <linux/xxhash.h> /* xxh64_reset, xxh64_update, xxh64_digest, XXH64 */ #include <linux/xxhash.h> /* xxh64_reset, xxh64_update, xxh64_digest, XXH64 */
#include "../common/zstd_internal.h" /* blockProperties_t */ #include "../common/zstd_internal.h" /* blockProperties_t */
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */ #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */ #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
#include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */ #include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */
#include "../common/bits.h" /* ZSTD_highbit32 */
@ -72,11 +74,11 @@
*************************************/ *************************************/
#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4 #define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. #define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
* Currently, that means a 0.75 load factor. * Currently, that means a 0.75 load factor.
* So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
* the load factor of the ddict hash set. * the load factor of the ddict hash set.
*/ */
#define DDICT_HASHSET_TABLE_BASE_SIZE 64 #define DDICT_HASHSET_TABLE_BASE_SIZE 64
#define DDICT_HASHSET_RESIZE_FACTOR 2 #define DDICT_HASHSET_RESIZE_FACTOR 2
@ -237,6 +239,7 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
dctx->outBufferMode = ZSTD_bm_buffered; dctx->outBufferMode = ZSTD_bm_buffered;
dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
dctx->disableHufAsm = 0;
} }
static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
@ -421,16 +424,40 @@ size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
* note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
* @return : 0, `zfhPtr` is correctly filled, * @return : 0, `zfhPtr` is correctly filled,
* >0, `srcSize` is too small, value is wanted `srcSize` amount, * >0, `srcSize` is too small, value is wanted `srcSize` amount,
* or an error code, which can be tested using ZSTD_isError() */ ** or an error code, which can be tested using ZSTD_isError() */
size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
{ {
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;
size_t const minInputSize = ZSTD_startingInputLength(format); size_t const minInputSize = ZSTD_startingInputLength(format);
ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ DEBUGLOG(5, "ZSTD_getFrameHeader_advanced: minInputSize = %zu, srcSize = %zu", minInputSize, srcSize);
if (srcSize < minInputSize) return minInputSize;
RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
if (srcSize > 0) {
/* note : technically could be considered an assert(), since it's an invalid entry */
RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter : src==NULL, but srcSize>0");
}
if (srcSize < minInputSize) {
if (srcSize > 0 && format != ZSTD_f_zstd1_magicless) {
/* when receiving less than @minInputSize bytes,
* control these bytes at least correspond to a supported magic number
* in order to error out early if they don't.
**/
size_t const toCopy = MIN(4, srcSize);
unsigned char hbuf[4]; MEM_writeLE32(hbuf, ZSTD_MAGICNUMBER);
assert(src != NULL);
ZSTD_memcpy(hbuf, src, toCopy);
if ( MEM_readLE32(hbuf) != ZSTD_MAGICNUMBER ) {
/* not a zstd frame : let's check if it's a skippable frame */
MEM_writeLE32(hbuf, ZSTD_MAGIC_SKIPPABLE_START);
ZSTD_memcpy(hbuf, src, toCopy);
if ((MEM_readLE32(hbuf) & ZSTD_MAGIC_SKIPPABLE_MASK) != ZSTD_MAGIC_SKIPPABLE_START) {
RETURN_ERROR(prefix_unknown,
"first bytes don't correspond to any supported magic number");
} } }
return minInputSize;
}
ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzers may not understand that zfhPtr will be read only if return value is zero, since they are 2 different signals */
if ( (format != ZSTD_f_zstd1_magicless) if ( (format != ZSTD_f_zstd1_magicless)
&& (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
@ -540,49 +567,52 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
frameParameter_unsupported, ""); frameParameter_unsupported, "");
{ { size_t const skippableSize = skippableHeaderSize + sizeU32;
size_t const skippableSize = skippableHeaderSize + sizeU32;
RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
return skippableSize; return skippableSize;
} }
} }
/*! ZSTD_readSkippableFrame() : /*! ZSTD_readSkippableFrame() :
* Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer. * Retrieves content of a skippable frame, and writes it to dst buffer.
* *
* The parameter magicVariant will receive the magicVariant that was supplied when the frame was written, * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
* i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested
* in the magicVariant. * in the magicVariant.
* *
* Returns an error if destination buffer is not large enough, or if the frame is not skippable. * Returns an error if destination buffer is not large enough, or if this is not a valid skippable frame.
* *
* @return : number of bytes written or a ZSTD error. * @return : number of bytes written or a ZSTD error.
*/ */
ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant, size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity,
const void* src, size_t srcSize) unsigned* magicVariant, /* optional, can be NULL */
const void* src, size_t srcSize)
{ {
U32 const magicNumber = MEM_readLE32(src); RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
/* check input validity */ { U32 const magicNumber = MEM_readLE32(src);
RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, ""); size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, ""); size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
/* deliver payload */ /* check input validity */
if (skippableContentSize > 0 && dst != NULL) RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize); RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
if (magicVariant != NULL) RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
*magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
return skippableContentSize; /* deliver payload */
if (skippableContentSize > 0 && dst != NULL)
ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
if (magicVariant != NULL)
*magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
return skippableContentSize;
}
} }
/* ZSTD_findDecompressedSize() : /* ZSTD_findDecompressedSize() :
* compatible with legacy mode
* `srcSize` must be the exact length of some number of ZSTD compressed and/or * `srcSize` must be the exact length of some number of ZSTD compressed and/or
* skippable frames * skippable frames
* @return : decompressed size of the frames contained */ * note: compatible with legacy mode
* @return : decompressed size of the frames contained */
unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
{ {
unsigned long long totalDstSize = 0; unsigned long long totalDstSize = 0;
@ -592,9 +622,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
size_t const skippableSize = readSkippableFrameSize(src, srcSize); size_t const skippableSize = readSkippableFrameSize(src, srcSize);
if (ZSTD_isError(skippableSize)) { if (ZSTD_isError(skippableSize)) return ZSTD_CONTENTSIZE_ERROR;
return ZSTD_CONTENTSIZE_ERROR;
}
assert(skippableSize <= srcSize); assert(skippableSize <= srcSize);
src = (const BYTE *)src + skippableSize; src = (const BYTE *)src + skippableSize;
@ -602,17 +630,17 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
continue; continue;
} }
{ unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); { unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize);
if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs;
/* check for overflow */ if (totalDstSize + fcs < totalDstSize)
if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */
totalDstSize += ret; totalDstSize += fcs;
} }
/* skip to next frame */
{ size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
if (ZSTD_isError(frameSrcSize)) { if (ZSTD_isError(frameSrcSize)) return ZSTD_CONTENTSIZE_ERROR;
return ZSTD_CONTENTSIZE_ERROR; assert(frameSrcSize <= srcSize);
}
src = (const BYTE *)src + frameSrcSize; src = (const BYTE *)src + frameSrcSize;
srcSize -= frameSrcSize; srcSize -= frameSrcSize;
@ -730,10 +758,11 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
ip += 4; ip += 4;
} }
frameSizeInfo.nbBlocks = nbBlocks;
frameSizeInfo.compressedSize = (size_t)(ip - ipstart); frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
? zfh.frameContentSize ? zfh.frameContentSize
: nbBlocks * zfh.blockSizeMax; : (unsigned long long)nbBlocks * zfh.blockSizeMax;
return frameSizeInfo; return frameSizeInfo;
} }
} }
@ -773,6 +802,48 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
return bound; return bound;
} }
size_t ZSTD_decompressionMargin(void const* src, size_t srcSize)
{
size_t margin = 0;
unsigned maxBlockSize = 0;
/* Iterate over each frame */
while (srcSize > 0) {
ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
size_t const compressedSize = frameSizeInfo.compressedSize;
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
ZSTD_frameHeader zfh;
FORWARD_IF_ERROR(ZSTD_getFrameHeader(&zfh, src, srcSize), "");
if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
return ERROR(corruption_detected);
if (zfh.frameType == ZSTD_frame) {
/* Add the frame header to our margin */
margin += zfh.headerSize;
/* Add the checksum to our margin */
margin += zfh.checksumFlag ? 4 : 0;
/* Add 3 bytes per block */
margin += 3 * frameSizeInfo.nbBlocks;
/* Compute the max block size */
maxBlockSize = MAX(maxBlockSize, zfh.blockSizeMax);
} else {
assert(zfh.frameType == ZSTD_skippableFrame);
/* Add the entire skippable frame size to our margin. */
margin += compressedSize;
}
assert(srcSize >= compressedSize);
src = (const BYTE*)src + compressedSize;
srcSize -= compressedSize;
}
/* Add the max block size back to the margin. */
margin += maxBlockSize;
return margin;
}
/*-************************************************************* /*-*************************************************************
* Frame decoding * Frame decoding
@ -930,6 +1001,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
} }
ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0); ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
/* Allow caller to get size read */ /* Allow caller to get size read */
DEBUGLOG(4, "ZSTD_decompressFrame: decompressed frame of size %zi, consuming %zi bytes of input", op-ostart, ip - (const BYTE*)*srcPtr);
*srcPtr = ip; *srcPtr = ip;
*srcSizePtr = remainingSrcSize; *srcSizePtr = remainingSrcSize;
return (size_t)(op-ostart); return (size_t)(op-ostart);
@ -955,17 +1027,18 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
while (srcSize >= ZSTD_startingInputLength(dctx->format)) { while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
{ U32 const magicNumber = MEM_readLE32(src); if (srcSize >= 4) {
DEBUGLOG(4, "reading magic number %08X (expecting %08X)", U32 const magicNumber = MEM_readLE32(src);
(unsigned)magicNumber, ZSTD_MAGICNUMBER); DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
/* skippable frame detected : skip it */
size_t const skippableSize = readSkippableFrameSize(src, srcSize); size_t const skippableSize = readSkippableFrameSize(src, srcSize);
FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); FORWARD_IF_ERROR(skippableSize, "invalid skippable frame");
assert(skippableSize <= srcSize); assert(skippableSize <= srcSize);
src = (const BYTE *)src + skippableSize; src = (const BYTE *)src + skippableSize;
srcSize -= skippableSize; srcSize -= skippableSize;
continue; continue; /* check next frame */
} } } }
if (ddict) { if (ddict) {
@ -1061,8 +1134,8 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
/* /*
* Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, we
* we allow taking a partial block as the input. Currently only raw uncompressed blocks can * allow taking a partial block as the input. Currently only raw uncompressed blocks can
* be streamed. * be streamed.
* *
* For blocks that can be streamed, this allows us to reduce the latency until we produce * For blocks that can be streamed, this allows us to reduce the latency until we produce
@ -1262,7 +1335,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
default: default:
assert(0); /* impossible */ assert(0); /* impossible */
RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ RETURN_ERROR(GENERIC, "impossible to reach"); /* some compilers require default to do something */
} }
} }
@ -1303,11 +1376,11 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
/* in minimal huffman, we always use X1 variants */ /* in minimal huffman, we always use X1 variants */
size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
dictPtr, dictEnd - dictPtr, dictPtr, dictEnd - dictPtr,
workspace, workspaceSize); workspace, workspaceSize, /* flags */ 0);
#else #else
size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
dictPtr, (size_t)(dictEnd - dictPtr), dictPtr, (size_t)(dictEnd - dictPtr),
workspace, workspaceSize); workspace, workspaceSize, /* flags */ 0);
#endif #endif
RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
dictPtr += hSize; dictPtr += hSize;
@ -1403,7 +1476,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
dctx->prefixStart = NULL; dctx->prefixStart = NULL;
dctx->virtualStart = NULL; dctx->virtualStart = NULL;
dctx->dictEnd = NULL; dctx->dictEnd = NULL;
dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ dctx->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */
dctx->litEntropy = dctx->fseEntropy = 0; dctx->litEntropy = dctx->fseEntropy = 0;
dctx->dictID = 0; dctx->dictID = 0;
dctx->bType = bt_reserved; dctx->bType = bt_reserved;
@ -1465,7 +1538,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
* This could for one of the following reasons : * This could for one of the following reasons :
* - The frame does not require a dictionary (most common case). * - The frame does not require a dictionary (most common case).
* - The frame was built with dictID intentionally removed. * - The frame was built with dictID intentionally removed.
* Needed dictionary is a hidden information. * Needed dictionary is a hidden piece of information.
* Note : this use case also happens when using a non-conformant dictionary. * Note : this use case also happens when using a non-conformant dictionary.
* - `srcSize` is too small, and as a result, frame header could not be decoded. * - `srcSize` is too small, and as a result, frame header could not be decoded.
* Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
@ -1474,7 +1547,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
* ZSTD_getFrameHeader(), which will provide a more precise error code. */ * ZSTD_getFrameHeader(), which will provide a more precise error code. */
unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
{ {
ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0 };
size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
if (ZSTD_isError(hError)) return 0; if (ZSTD_isError(hError)) return 0;
return zfp.dictID; return zfp.dictID;
@ -1581,7 +1654,9 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di
size_t ZSTD_initDStream(ZSTD_DStream* zds) size_t ZSTD_initDStream(ZSTD_DStream* zds)
{ {
DEBUGLOG(4, "ZSTD_initDStream"); DEBUGLOG(4, "ZSTD_initDStream");
return ZSTD_initDStream_usingDDict(zds, NULL); FORWARD_IF_ERROR(ZSTD_DCtx_reset(zds, ZSTD_reset_session_only), "");
FORWARD_IF_ERROR(ZSTD_DCtx_refDDict(zds, NULL), "");
return ZSTD_startingInputLength(zds->format);
} }
/* ZSTD_initDStream_usingDDict() : /* ZSTD_initDStream_usingDDict() :
@ -1589,6 +1664,7 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds)
* this function cannot fail */ * this function cannot fail */
size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
{ {
DEBUGLOG(4, "ZSTD_initDStream_usingDDict");
FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
return ZSTD_startingInputLength(dctx->format); return ZSTD_startingInputLength(dctx->format);
@ -1599,6 +1675,7 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
* this function cannot fail */ * this function cannot fail */
size_t ZSTD_resetDStream(ZSTD_DStream* dctx) size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
{ {
DEBUGLOG(4, "ZSTD_resetDStream");
FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
return ZSTD_startingInputLength(dctx->format); return ZSTD_startingInputLength(dctx->format);
} }
@ -1670,6 +1747,11 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict; bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts; bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
return bounds; return bounds;
case ZSTD_d_disableHuffmanAssembly:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
default:; default:;
} }
bounds.error = ERROR(parameter_unsupported); bounds.error = ERROR(parameter_unsupported);
@ -1710,6 +1792,9 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value
case ZSTD_d_refMultipleDDicts: case ZSTD_d_refMultipleDDicts:
*value = (int)dctx->refMultipleDDicts; *value = (int)dctx->refMultipleDDicts;
return 0; return 0;
case ZSTD_d_disableHuffmanAssembly:
*value = (int)dctx->disableHufAsm;
return 0;
default:; default:;
} }
RETURN_ERROR(parameter_unsupported, ""); RETURN_ERROR(parameter_unsupported, "");
@ -1743,6 +1828,10 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
} }
dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value; dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
return 0; return 0;
case ZSTD_d_disableHuffmanAssembly:
CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value);
dctx->disableHufAsm = value != 0;
return 0;
default:; default:;
} }
RETURN_ERROR(parameter_unsupported, ""); RETURN_ERROR(parameter_unsupported, "");
@ -1918,7 +2007,6 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
if (zds->refMultipleDDicts && zds->ddictSet) { if (zds->refMultipleDDicts && zds->ddictSet) {
ZSTD_DCtx_selectFrameDDict(zds); ZSTD_DCtx_selectFrameDDict(zds);
} }
DEBUGLOG(5, "header size : %u", (U32)hSize);
if (ZSTD_isError(hSize)) { if (ZSTD_isError(hSize)) {
return hSize; /* error */ return hSize; /* error */
} }
@ -1932,6 +2020,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
zds->lhSize += remainingInput; zds->lhSize += remainingInput;
} }
input->pos = input->size; input->pos = input->size;
/* check first few bytes */
FORWARD_IF_ERROR(
ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format),
"First few bytes detected incorrect" );
/* return hint input size */
return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
} }
assert(ip != NULL); assert(ip != NULL);
@ -1949,8 +2042,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
if (ZSTD_isError(decompressedSize)) return decompressedSize; if (ZSTD_isError(decompressedSize)) return decompressedSize;
DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
assert(istart != NULL);
ip = istart + cSize; ip = istart + cSize;
op += decompressedSize; op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */
zds->expected = 0; zds->expected = 0;
zds->streamStage = zdss_init; zds->streamStage = zdss_init;
someMoreWork = 0; someMoreWork = 0;
@ -2034,6 +2128,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
} }
if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */
FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
assert(ip != NULL);
ip += neededInSize; ip += neededInSize;
/* Function modifies the stage so we must break */ /* Function modifies the stage so we must break */
break; break;
@ -2048,7 +2143,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
int const isSkipFrame = ZSTD_isSkipFrame(zds); int const isSkipFrame = ZSTD_isSkipFrame(zds);
size_t loadedSize; size_t loadedSize;
/* At this point we shouldn't be decompressing a block that we can stream. */ /* At this point we shouldn't be decompressing a block that we can stream. */
assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)));
if (isSkipFrame) { if (isSkipFrame) {
loadedSize = MIN(toLoad, (size_t)(iend-ip)); loadedSize = MIN(toLoad, (size_t)(iend-ip));
} else { } else {
@ -2057,8 +2152,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
"should never happen"); "should never happen");
loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip)); loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
} }
ip += loadedSize; if (loadedSize != 0) {
zds->inPos += loadedSize; /* ip may be NULL */
ip += loadedSize;
zds->inPos += loadedSize;
}
if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */
/* decode loaded input */ /* decode loaded input */
@ -2068,14 +2166,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
break; break;
} }
case zdss_flush: case zdss_flush:
{ size_t const toFlushSize = zds->outEnd - zds->outStart; {
size_t const toFlushSize = zds->outEnd - zds->outStart;
size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize); size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
op += flushedSize;
op = op ? op + flushedSize : op;
zds->outStart += flushedSize; zds->outStart += flushedSize;
if (flushedSize == toFlushSize) { /* flush completed */ if (flushedSize == toFlushSize) { /* flush completed */
zds->streamStage = zdss_read; zds->streamStage = zdss_read;
if ( (zds->outBuffSize < zds->fParams.frameContentSize) if ( (zds->outBuffSize < zds->fParams.frameContentSize)
&& (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
(int)(zds->outBuffSize - zds->outStart), (int)(zds->outBuffSize - zds->outStart),
(U32)zds->fParams.blockSizeMax); (U32)zds->fParams.blockSizeMax);
@ -2089,7 +2190,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
default: default:
assert(0); /* impossible */ assert(0); /* impossible */
RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ RETURN_ERROR(GENERIC, "impossible to reach"); /* some compilers require default to do something */
} } } }
/* result */ /* result */
@ -2102,8 +2203,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
if ((ip==istart) && (op==ostart)) { /* no forward progress */ if ((ip==istart) && (op==ostart)) { /* no forward progress */
zds->noForwardProgress ++; zds->noForwardProgress ++;
if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); RETURN_ERROR_IF(op==oend, noForwardProgress_destFull, "");
RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); RETURN_ERROR_IF(ip==iend, noForwardProgress_inputEmpty, "");
assert(0); assert(0);
} }
} else { } else {
@ -2140,11 +2241,17 @@ size_t ZSTD_decompressStream_simpleArgs (
void* dst, size_t dstCapacity, size_t* dstPos, void* dst, size_t dstCapacity, size_t* dstPos,
const void* src, size_t srcSize, size_t* srcPos) const void* src, size_t srcSize, size_t* srcPos)
{ {
ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; ZSTD_outBuffer output;
ZSTD_inBuffer input = { src, srcSize, *srcPos }; ZSTD_inBuffer input;
/* ZSTD_compress_generic() will check validity of dstPos and srcPos */ output.dst = dst;
size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); output.size = dstCapacity;
*dstPos = output.pos; output.pos = *dstPos;
*srcPos = input.pos; input.src = src;
return cErr; input.size = srcSize;
input.pos = *srcPos;
{ size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
*dstPos = output.pos;
*srcPos = input.pos;
return cErr;
}
} }

View File

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -20,12 +21,12 @@
#include "../common/mem.h" /* low level memory routines */ #include "../common/mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY #define FSE_STATIC_LINKING_ONLY
#include "../common/fse.h" #include "../common/fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h" #include "../common/huf.h"
#include "../common/zstd_internal.h" #include "../common/zstd_internal.h"
#include "zstd_decompress_internal.h" /* ZSTD_DCtx */ #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
#include "zstd_ddict.h" /* ZSTD_DDictDictContent */ #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
#include "zstd_decompress_block.h" #include "zstd_decompress_block.h"
#include "../common/bits.h" /* ZSTD_highbit32 */
/*_******************************************************* /*_*******************************************************
* Macros * Macros
@ -89,7 +90,7 @@ static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const
dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE; dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
} }
else { else {
/* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */ /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize; dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize; dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
} }
@ -134,13 +135,16 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
ZSTD_FALLTHROUGH; ZSTD_FALLTHROUGH;
case set_compressed: case set_compressed:
RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
{ size_t lhSize, litSize, litCSize; { size_t lhSize, litSize, litCSize;
U32 singleStream=0; U32 singleStream=0;
U32 const lhlCode = (istart[0] >> 2) & 3; U32 const lhlCode = (istart[0] >> 2) & 3;
U32 const lhc = MEM_readLE32(istart); U32 const lhc = MEM_readLE32(istart);
size_t hufSuccess; size_t hufSuccess;
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity); size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
int const flags = 0
| (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
| (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
switch(lhlCode) switch(lhlCode)
{ {
case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
@ -165,6 +169,10 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
} }
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
if (!singleStream)
RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
"Not enough literals (%zu) for the 4-streams mode (min %u)",
litSize, MIN_LITERALS_FOR_4_STREAMS);
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, ""); RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0); ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
@ -176,13 +184,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
if (litEncType==set_repeat) { if (litEncType==set_repeat) {
if (singleStream) { if (singleStream) {
hufSuccess = HUF_decompress1X_usingDTable_bmi2( hufSuccess = HUF_decompress1X_usingDTable(
dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->litBuffer, litSize, istart+lhSize, litCSize,
dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); dctx->HUFptr, flags);
} else { } else {
hufSuccess = HUF_decompress4X_usingDTable_bmi2( assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
hufSuccess = HUF_decompress4X_usingDTable(
dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->litBuffer, litSize, istart+lhSize, litCSize,
dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx)); dctx->HUFptr, flags);
} }
} else { } else {
if (singleStream) { if (singleStream) {
@ -190,18 +199,18 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
hufSuccess = HUF_decompress1X_DCtx_wksp( hufSuccess = HUF_decompress1X_DCtx_wksp(
dctx->entropy.hufTable, dctx->litBuffer, litSize, dctx->entropy.hufTable, dctx->litBuffer, litSize,
istart+lhSize, litCSize, dctx->workspace, istart+lhSize, litCSize, dctx->workspace,
sizeof(dctx->workspace)); sizeof(dctx->workspace), flags);
#else #else
hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( hufSuccess = HUF_decompress1X1_DCtx_wksp(
dctx->entropy.hufTable, dctx->litBuffer, litSize, dctx->entropy.hufTable, dctx->litBuffer, litSize,
istart+lhSize, litCSize, dctx->workspace, istart+lhSize, litCSize, dctx->workspace,
sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); sizeof(dctx->workspace), flags);
#endif #endif
} else { } else {
hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( hufSuccess = HUF_decompress4X_hufOnly_wksp(
dctx->entropy.hufTable, dctx->litBuffer, litSize, dctx->entropy.hufTable, dctx->litBuffer, litSize,
istart+lhSize, litCSize, dctx->workspace, istart+lhSize, litCSize, dctx->workspace,
sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx)); sizeof(dctx->workspace), flags);
} }
} }
if (dctx->litBufferLocation == ZSTD_split) if (dctx->litBufferLocation == ZSTD_split)
@ -237,6 +246,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
break; break;
case 3: case 3:
lhSize = 3; lhSize = 3;
RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
litSize = MEM_readLE24(istart) >> 4; litSize = MEM_readLE24(istart) >> 4;
break; break;
} }
@ -279,12 +289,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
break; break;
case 1: case 1:
lhSize = 2; lhSize = 2;
RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
litSize = MEM_readLE16(istart) >> 4; litSize = MEM_readLE16(istart) >> 4;
break; break;
case 3: case 3:
lhSize = 3; lhSize = 3;
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
litSize = MEM_readLE24(istart) >> 4; litSize = MEM_readLE24(istart) >> 4;
RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
break; break;
} }
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
@ -506,14 +517,15 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
for (i = 8; i < n; i += 8) { for (i = 8; i < n; i += 8) {
MEM_write64(spread + pos + i, sv); MEM_write64(spread + pos + i, sv);
} }
pos += n; assert(n>=0);
pos += (size_t)n;
} }
} }
/* Now we spread those positions across the table. /* Now we spread those positions across the table.
* The benefit of doing it in two stages is that we avoid the the * The benefit of doing it in two stages is that we avoid the
* variable size inner loop, which caused lots of branch misses. * variable size inner loop, which caused lots of branch misses.
* Now we can run through all the positions without any branch misses. * Now we can run through all the positions without any branch misses.
* We unroll the loop twice, since that is what emperically worked best. * We unroll the loop twice, since that is what empirically worked best.
*/ */
{ {
size_t position = 0; size_t position = 0;
@ -540,7 +552,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
tableDecode[position].baseValue = s; tableDecode[position].baseValue = s;
position = (position + step) & tableMask; position = (position + step) & tableMask;
while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */ while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */
} } } }
assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
} }
@ -551,7 +563,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
for (u=0; u<tableSize; u++) { for (u=0; u<tableSize; u++) {
U32 const symbol = tableDecode[u].baseValue; U32 const symbol = tableDecode[u].baseValue;
U32 const nextState = symbolNext[symbol]++; U32 const nextState = symbolNext[symbol]++;
tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) ); tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
assert(nbAdditionalBits[symbol] < 255); assert(nbAdditionalBits[symbol] < 255);
tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol]; tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
@ -964,6 +976,11 @@ size_t ZSTD_execSequence(BYTE* op,
assert(op != NULL /* Precondition */); assert(op != NULL /* Precondition */);
assert(oend_w < oend /* No underflow */); assert(oend_w < oend /* No underflow */);
#if defined(__aarch64__)
/* prefetch sequence starting from match that will be used for copy later */
PREFETCH_L1(match);
#endif
/* Handle edge cases in a slow path: /* Handle edge cases in a slow path:
* - Read beyond end of literals * - Read beyond end of literals
* - Match end is within WILDCOPY_OVERLIMIT of oend * - Match end is within WILDCOPY_OVERLIMIT of oend
@ -1154,7 +1171,7 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16
} }
/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
* bits before reloading. This value is the maximum number of bytes we read * bits before reloading. This value is the maximum number of bytes we read
* after reloading when we are decoding long offsets. * after reloading when we are decoding long offsets.
*/ */
@ -1169,9 +1186,27 @@ FORCE_INLINE_TEMPLATE seq_t
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
{ {
seq_t seq; seq_t seq;
/*
* ZSTD_seqSymbol is a structure with a total of 64 bits wide. So it can be
* loaded in one operation and extracted its fields by simply shifting or
* bit-extracting on aarch64.
* GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
* operations that cause performance drop. This can be avoided by using this
* ZSTD_memcpy hack.
*/
#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
ZSTD_seqSymbol* const llDInfo = &llDInfoS;
ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
#else
const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state; const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state; const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state; const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
#endif
seq.matchLength = mlDInfo->baseValue; seq.matchLength = mlDInfo->baseValue;
seq.litLength = llDInfo->baseValue; seq.litLength = llDInfo->baseValue;
{ U32 const ofBase = ofDInfo->baseValue; { U32 const ofBase = ofDInfo->baseValue;
@ -1186,28 +1221,31 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
U32 const llnbBits = llDInfo->nbBits; U32 const llnbBits = llDInfo->nbBits;
U32 const mlnbBits = mlDInfo->nbBits; U32 const mlnbBits = mlDInfo->nbBits;
U32 const ofnbBits = ofDInfo->nbBits; U32 const ofnbBits = ofDInfo->nbBits;
assert(llBits <= MaxLLBits);
assert(mlBits <= MaxMLBits);
assert(ofBits <= MaxOff);
/* /*
* As gcc has better branch and block analyzers, sometimes it is only * As gcc has better branch and block analyzers, sometimes it is only
* valuable to mark likelyness for clang, it gives around 3-4% of * valuable to mark likeliness for clang, it gives around 3-4% of
* performance. * performance.
*/ */
/* sequence */ /* sequence */
{ size_t offset; { size_t offset;
#if defined(__clang__)
if (LIKELY(ofBits > 1)) {
#else
if (ofBits > 1) { if (ofBits > 1) {
#endif
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
assert(ofBits <= MaxOff); ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); /* Always read extra bits, this keeps the logic simple,
* avoids branches, and avoids accidentally reading 0 bits.
*/
U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
BIT_reloadDStream(&seqState->DStream); BIT_reloadDStream(&seqState->DStream);
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); offset += BIT_readBitsFast(&seqState->DStream, extraBits);
assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
} else { } else {
offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
@ -1232,11 +1270,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
seq.offset = offset; seq.offset = offset;
} }
#if defined(__clang__)
if (UNLIKELY(mlBits > 0))
#else
if (mlBits > 0) if (mlBits > 0)
#endif
seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
@ -1246,11 +1280,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
/* Ensure there are enough bits to read the rest of data in 64-bit mode. */ /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
#if defined(__clang__)
if (UNLIKELY(llBits > 0))
#else
if (llBits > 0) if (llBits > 0)
#endif
seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
if (MEM_32bits()) if (MEM_32bits())
@ -1552,7 +1582,7 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart); const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
const BYTE* const vBase = (const BYTE*)(dctx->virtualStart); const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd); const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
DEBUGLOG(5, "ZSTD_decompressSequences_body"); DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
(void)frame; (void)frame;
/* Regen sequences */ /* Regen sequences */
@ -1945,34 +1975,79 @@ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
/*
* @returns The total size of the history referenceable by zstd, including
* both the prefix and the extDict. At @p op any offset larger than this
* is invalid.
*/
static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
{
return (size_t)(op - virtualStart);
}
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ typedef struct {
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) unsigned longOffsetShare;
/* ZSTD_getLongOffsetsShare() : unsigned maxNbAdditionalBits;
} ZSTD_OffsetInfo;
/* ZSTD_getOffsetInfo() :
* condition : offTable must be valid * condition : offTable must be valid
* @return : "share" of long offsets (arbitrarily defined as > (1<<23)) * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
* compared to maximum possible of (1<<OffFSELog) */ * compared to maximum possible of (1<<OffFSELog),
static unsigned * as well as the maximum number additional bits required.
ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable) */
static ZSTD_OffsetInfo
ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
{ {
const void* ptr = offTable; ZSTD_OffsetInfo info = {0, 0};
U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog; /* If nbSeq == 0, then the offTable is uninitialized, but we have
const ZSTD_seqSymbol* table = offTable + 1; * no sequences, so both values should be 0.
U32 const max = 1 << tableLog; */
U32 u, total = 0; if (nbSeq != 0) {
DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog); const void* ptr = offTable;
U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
const ZSTD_seqSymbol* table = offTable + 1;
U32 const max = 1 << tableLog;
U32 u;
DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
assert(max <= (1 << OffFSELog)); /* max not too large */ assert(max <= (1 << OffFSELog)); /* max not too large */
for (u=0; u<max; u++) { for (u=0; u<max; u++) {
if (table[u].nbAdditionalBits > 22) total += 1; info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
}
assert(tableLog <= OffFSELog);
info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */
} }
assert(tableLog <= OffFSELog); return info;
total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ }
return total; /*
* @returns The maximum offset we can decode in one read of our bitstream, without
* reloading more bits in the middle of the offset bits read. Any offsets larger
* than this must use the long offset decoder.
*/
static size_t ZSTD_maxShortOffset(void)
{
if (MEM_64bits()) {
/* We can decode any offset without reloading bits.
* This might change if the max window size grows.
*/
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
return (size_t)-1;
} else {
/* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
* This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
* Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
*/
size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
return maxOffset;
}
} }
#endif
size_t size_t
ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
@ -1980,20 +2055,21 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
const void* src, size_t srcSize, const int frame, const streaming_operation streaming) const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
{ /* blockType == blockCompressed */ { /* blockType == blockCompressed */
const BYTE* ip = (const BYTE*)src; const BYTE* ip = (const BYTE*)src;
/* isLongOffset must be true if there are long offsets.
* Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
* We don't expect that to be the case in 64-bit mode.
* In block mode, window size is not known, so we have to be conservative.
* (note: but it could be evaluated from current-lowLimit)
*/
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); /* Note : the wording of the specification
* allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX.
* This generally does not happen, as it makes little sense,
* since an uncompressed block would feature same size and have no decompression cost.
* Also, note that decoder from reference libzstd before < v1.5.4
* would consider this edge case as an error.
* As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX
* for broader compatibility with the deployed ecosystem of zstd decoders */
RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
/* Decode literals section */ /* Decode literals section */
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming); { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
if (ZSTD_isError(litCSize)) return litCSize; if (ZSTD_isError(litCSize)) return litCSize;
ip += litCSize; ip += litCSize;
srcSize -= litCSize; srcSize -= litCSize;
@ -2001,6 +2077,23 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
/* Build Decoding Tables */ /* Build Decoding Tables */
{ {
/* Compute the maximum block size, which must also work when !frame and fParams are unset.
* Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
*/
size_t const blockSizeMax = MIN(dstCapacity, (frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX));
size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + blockSizeMax, (BYTE const*)dctx->virtualStart);
/* isLongOffset must be true if there are long offsets.
* Offsets are long if they are larger than ZSTD_maxShortOffset().
* We don't expect that to be the case in 64-bit mode.
*
* We check here to see if our history is large enough to allow long offsets.
* If it isn't, then we can't possible have (valid) long offsets. If the offset
* is invalid, then it is okay to read it incorrectly.
*
* If isLongOffsets is true, then we will later check our decoding table to see
* if it is even possible to generate long offsets.
*/
ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
/* These macros control at build-time which decompressor implementation /* These macros control at build-time which decompressor implementation
* we use. If neither is defined, we do some inspection and dispatch at * we use. If neither is defined, we do some inspection and dispatch at
* runtime. * runtime.
@ -2008,6 +2101,11 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
int usePrefetchDecoder = dctx->ddictIsCold; int usePrefetchDecoder = dctx->ddictIsCold;
#else
/* Set to 1 to avoid computing offset info if we don't need to.
* Otherwise this value is ignored.
*/
int usePrefetchDecoder = 1;
#endif #endif
int nbSeq; int nbSeq;
size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
@ -2015,28 +2113,42 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
ip += seqHSize; ip += seqHSize;
srcSize -= seqHSize; srcSize -= seqHSize;
RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
"invalid dst");
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ /* If we could potentially have long offsets, or we might want to use the prefetch decoder,
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) * compute information about the share of long offsets, and the maximum nbAdditionalBits.
if ( !usePrefetchDecoder * NOTE: could probably use a larger nbSeq limit
&& (!frame || (dctx->fParams.windowSize > (1<<24))) */
&& (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
usePrefetchDecoder = (shareLongOffsets >= minShare); /* If isLongOffset, but the maximum number of additional bits that we see in our table is small
* enough, then we know it is impossible to have too long an offset in this block, so we can
* use the regular offset decoder.
*/
isLongOffset = ZSTD_lo_isRegularOffset;
}
if (!usePrefetchDecoder) {
U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
usePrefetchDecoder = (info.longOffsetShare >= minShare);
}
} }
#endif
dctx->ddictIsCold = 0; dctx->ddictIsCold = 0;
#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
if (usePrefetchDecoder) if (usePrefetchDecoder) {
#else
(void)usePrefetchDecoder;
{
#endif #endif
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
#endif #endif
}
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
/* else */ /* else */
@ -2060,9 +2172,9 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
} }
size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,
const void* src, size_t srcSize) const void* src, size_t srcSize)
{ {
size_t dSize; size_t dSize;
ZSTD_checkContinuity(dctx, dst, dstCapacity); ZSTD_checkContinuity(dctx, dst, dstCapacity);
@ -2070,3 +2182,12 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
dctx->previousDstEnd = (char*)dst + dSize; dctx->previousDstEnd = (char*)dst + dSize;
return dSize; return dSize;
} }
/* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
{
return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
}

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -64,5 +65,10 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
unsigned tableLog, void* wksp, size_t wkspSize, unsigned tableLog, void* wksp, size_t wkspSize,
int bmi2); int bmi2);
/* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */
size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
#endif /* ZSTD_DEC_BLOCK_H */ #endif /* ZSTD_DEC_BLOCK_H */

View File

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Yann Collet, Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -75,12 +76,13 @@ static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
typedef struct { typedef struct {
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */
U32 rep[ZSTD_REP_NUM]; U32 rep[ZSTD_REP_NUM];
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
} ZSTD_entropyDTables_t; } ZSTD_entropyDTables_t;
@ -164,6 +166,7 @@ struct ZSTD_DCtx_s
ZSTD_dictUses_e dictUses; ZSTD_dictUses_e dictUses;
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
int disableHufAsm;
/* streaming */ /* streaming */
ZSTD_dStreamStage streamStage; ZSTD_dStreamStage streamStage;

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* /*
* Copyright (c) Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -24,9 +24,6 @@ EXPORT_SYMBOL_GPL(HUF_readStats_wksp);
EXPORT_SYMBOL_GPL(ZSTD_isError); EXPORT_SYMBOL_GPL(ZSTD_isError);
EXPORT_SYMBOL_GPL(ZSTD_getErrorName); EXPORT_SYMBOL_GPL(ZSTD_getErrorName);
EXPORT_SYMBOL_GPL(ZSTD_getErrorCode); EXPORT_SYMBOL_GPL(ZSTD_getErrorCode);
EXPORT_SYMBOL_GPL(ZSTD_customMalloc);
EXPORT_SYMBOL_GPL(ZSTD_customCalloc);
EXPORT_SYMBOL_GPL(ZSTD_customFree);
MODULE_LICENSE("Dual BSD/GPL"); MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("Zstd Common"); MODULE_DESCRIPTION("Zstd Common");

View File

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the

View File

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause // SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/* /*
* Copyright (c) Facebook, Inc. * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved. * All rights reserved.
* *
* This source code is licensed under both the BSD-style license (found in the * This source code is licensed under both the BSD-style license (found in the
@ -77,7 +77,7 @@ EXPORT_SYMBOL(zstd_init_dstream);
size_t zstd_reset_dstream(zstd_dstream *dstream) size_t zstd_reset_dstream(zstd_dstream *dstream)
{ {
return ZSTD_resetDStream(dstream); return ZSTD_DCtx_reset(dstream, ZSTD_reset_session_only);
} }
EXPORT_SYMBOL(zstd_reset_dstream); EXPORT_SYMBOL(zstd_reset_dstream);