mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2024-12-28 16:52:18 +00:00
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
This commit is contained in:
commit
ad9dd1674d
@ -337,6 +337,7 @@ Currently, the following pairs of encryption modes are supported:
|
||||
- AES-256-XTS for contents and AES-256-CTS-CBC for filenames
|
||||
- AES-128-CBC for contents and AES-128-CTS-CBC for filenames
|
||||
- Adiantum for both contents and filenames
|
||||
- AES-256-XTS for contents and AES-256-HCTR2 for filenames (v2 policies only)
|
||||
|
||||
If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
|
||||
|
||||
@ -357,6 +358,17 @@ To use Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled. Also, fast
|
||||
implementations of ChaCha and NHPoly1305 should be enabled, e.g.
|
||||
CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM.
|
||||
|
||||
AES-256-HCTR2 is another true wide-block encryption mode that is intended for
|
||||
use on CPUs with dedicated crypto instructions. AES-256-HCTR2 has the property
|
||||
that a bitflip in the plaintext changes the entire ciphertext. This property
|
||||
makes it desirable for filename encryption since initialization vectors are
|
||||
reused within a directory. For more details on AES-256-HCTR2, see the paper
|
||||
"Length-preserving encryption with HCTR2"
|
||||
(https://eprint.iacr.org/2021/1441.pdf). To use AES-256-HCTR2,
|
||||
CONFIG_CRYPTO_HCTR2 must be enabled. Also, fast implementations of XCTR and
|
||||
POLYVAL should be enabled, e.g. CRYPTO_POLYVAL_ARM64_CE and
|
||||
CRYPTO_AES_ARM64_CE_BLK for ARM64.
|
||||
|
||||
New encryption modes can be added relatively easily, without changes
|
||||
to individual filesystems. However, authenticated encryption (AE)
|
||||
modes are not currently supported because of the difficulty of dealing
|
||||
@ -404,11 +416,11 @@ alternatively has the file's nonce (for `DIRECT_KEY policies`_) or
|
||||
inode number (for `IV_INO_LBLK_64 policies`_) included in the IVs.
|
||||
Thus, IV reuse is limited to within a single directory.
|
||||
|
||||
With CTS-CBC, the IV reuse means that when the plaintext filenames
|
||||
share a common prefix at least as long as the cipher block size (16
|
||||
bytes for AES), the corresponding encrypted filenames will also share
|
||||
a common prefix. This is undesirable. Adiantum does not have this
|
||||
weakness, as it is a wide-block encryption mode.
|
||||
With CTS-CBC, the IV reuse means that when the plaintext filenames share a
|
||||
common prefix at least as long as the cipher block size (16 bytes for AES), the
|
||||
corresponding encrypted filenames will also share a common prefix. This is
|
||||
undesirable. Adiantum and HCTR2 do not have this weakness, as they are
|
||||
wide-block encryption modes.
|
||||
|
||||
All supported filenames encryption modes accept any plaintext length
|
||||
>= 16 bytes; cipher block alignment is not required. However,
|
||||
|
17
MAINTAINERS
17
MAINTAINERS
@ -8978,15 +8978,24 @@ F: Documentation/admin-guide/perf/hisi-pcie-pmu.rst
|
||||
F: Documentation/admin-guide/perf/hisi-pmu.rst
|
||||
F: drivers/perf/hisilicon
|
||||
|
||||
HISILICON QM AND ZIP Controller DRIVER
|
||||
HISILICON QM DRIVER
|
||||
M: Weili Qian <qianweili@huawei.com>
|
||||
M: Zhou Wang <wangzhou1@hisilicon.com>
|
||||
L: linux-crypto@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/crypto/hisilicon/Kconfig
|
||||
F: drivers/crypto/hisilicon/Makefile
|
||||
F: drivers/crypto/hisilicon/qm.c
|
||||
F: drivers/crypto/hisilicon/sgl.c
|
||||
F: include/linux/hisi_acc_qm.h
|
||||
|
||||
HISILICON ZIP Controller DRIVER
|
||||
M: Yang Shen <shenyang39@huawei.com>
|
||||
M: Zhou Wang <wangzhou1@hisilicon.com>
|
||||
L: linux-crypto@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/ABI/testing/debugfs-hisi-zip
|
||||
F: drivers/crypto/hisilicon/qm.c
|
||||
F: drivers/crypto/hisilicon/sgl.c
|
||||
F: drivers/crypto/hisilicon/zip/
|
||||
F: include/linux/hisi_acc_qm.h
|
||||
|
||||
HISILICON ROCE DRIVER
|
||||
M: Wenpeng Liang <liangwenpeng@huawei.com>
|
||||
|
@ -63,7 +63,7 @@ config CRYPTO_SHA512_ARM
|
||||
using optimized ARM assembler and NEON, when available.
|
||||
|
||||
config CRYPTO_BLAKE2S_ARM
|
||||
tristate "BLAKE2s digest algorithm (ARM)"
|
||||
bool "BLAKE2s digest algorithm (ARM)"
|
||||
select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
|
||||
help
|
||||
BLAKE2s digest algorithm optimized with ARM scalar instructions. This
|
||||
|
@ -9,8 +9,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
|
||||
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
|
||||
obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += blake2s-arm.o
|
||||
obj-$(if $(CONFIG_CRYPTO_BLAKE2S_ARM),y) += libblake2s-arm.o
|
||||
obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += libblake2s-arm.o
|
||||
obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o
|
||||
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
|
||||
obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
|
||||
@ -32,7 +31,6 @@ sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
|
||||
sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
|
||||
sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o
|
||||
sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y)
|
||||
blake2s-arm-y := blake2s-shash.o
|
||||
libblake2s-arm-y:= blake2s-core.o blake2s-glue.o
|
||||
blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o
|
||||
sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
|
||||
|
@ -1,75 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* BLAKE2s digest algorithm, ARM scalar implementation
|
||||
*
|
||||
* Copyright 2020 Google LLC
|
||||
*/
|
||||
|
||||
#include <crypto/internal/blake2s.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
#include <linux/module.h>
|
||||
|
||||
static int crypto_blake2s_update_arm(struct shash_desc *desc,
|
||||
const u8 *in, unsigned int inlen)
|
||||
{
|
||||
return crypto_blake2s_update(desc, in, inlen, false);
|
||||
}
|
||||
|
||||
static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
return crypto_blake2s_final(desc, out, false);
|
||||
}
|
||||
|
||||
#define BLAKE2S_ALG(name, driver_name, digest_size) \
|
||||
{ \
|
||||
.base.cra_name = name, \
|
||||
.base.cra_driver_name = driver_name, \
|
||||
.base.cra_priority = 200, \
|
||||
.base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
|
||||
.base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \
|
||||
.base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \
|
||||
.base.cra_module = THIS_MODULE, \
|
||||
.digestsize = digest_size, \
|
||||
.setkey = crypto_blake2s_setkey, \
|
||||
.init = crypto_blake2s_init, \
|
||||
.update = crypto_blake2s_update_arm, \
|
||||
.final = crypto_blake2s_final_arm, \
|
||||
.descsize = sizeof(struct blake2s_state), \
|
||||
}
|
||||
|
||||
static struct shash_alg blake2s_arm_algs[] = {
|
||||
BLAKE2S_ALG("blake2s-128", "blake2s-128-arm", BLAKE2S_128_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-160", "blake2s-160-arm", BLAKE2S_160_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-224", "blake2s-224-arm", BLAKE2S_224_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-256", "blake2s-256-arm", BLAKE2S_256_HASH_SIZE),
|
||||
};
|
||||
|
||||
static int __init blake2s_arm_mod_init(void)
|
||||
{
|
||||
return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
|
||||
crypto_register_shashes(blake2s_arm_algs,
|
||||
ARRAY_SIZE(blake2s_arm_algs)) : 0;
|
||||
}
|
||||
|
||||
static void __exit blake2s_arm_mod_exit(void)
|
||||
{
|
||||
if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
|
||||
crypto_unregister_shashes(blake2s_arm_algs,
|
||||
ARRAY_SIZE(blake2s_arm_algs));
|
||||
}
|
||||
|
||||
module_init(blake2s_arm_mod_init);
|
||||
module_exit(blake2s_arm_mod_exit);
|
||||
|
||||
MODULE_DESCRIPTION("BLAKE2s digest algorithm, ARM scalar implementation");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-128");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-128-arm");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-160");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-160-arm");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-224");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-224-arm");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-256");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-256-arm");
|
@ -72,6 +72,11 @@ config CRYPTO_GHASH_ARM64_CE
|
||||
select CRYPTO_GF128MUL
|
||||
select CRYPTO_LIB_AES
|
||||
|
||||
config CRYPTO_POLYVAL_ARM64_CE
|
||||
tristate "POLYVAL using ARMv8 Crypto Extensions (for HCTR2)"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_POLYVAL
|
||||
|
||||
config CRYPTO_CRCT10DIF_ARM64_CE
|
||||
tristate "CRCT10DIF digest algorithm using PMULL instructions"
|
||||
depends on KERNEL_MODE_NEON && CRC_T10DIF
|
||||
@ -96,13 +101,13 @@ config CRYPTO_AES_ARM64_CE_CCM
|
||||
select CRYPTO_LIB_AES
|
||||
|
||||
config CRYPTO_AES_ARM64_CE_BLK
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
|
||||
tristate "AES in ECB/CBC/CTR/XTS/XCTR modes using ARMv8 Crypto Extensions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_AES_ARM64_CE
|
||||
|
||||
config CRYPTO_AES_ARM64_NEON_BLK
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
|
||||
tristate "AES in ECB/CBC/CTR/XTS/XCTR modes using NEON instructions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_LIB_AES
|
||||
|
@ -32,6 +32,9 @@ sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o
|
||||
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
|
||||
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_POLYVAL_ARM64_CE) += polyval-ce.o
|
||||
polyval-ce-y := polyval-ce-glue.o polyval-ce-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o
|
||||
crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
|
||||
|
||||
|
@ -34,10 +34,11 @@
|
||||
#define aes_essiv_cbc_encrypt ce_aes_essiv_cbc_encrypt
|
||||
#define aes_essiv_cbc_decrypt ce_aes_essiv_cbc_decrypt
|
||||
#define aes_ctr_encrypt ce_aes_ctr_encrypt
|
||||
#define aes_xctr_encrypt ce_aes_xctr_encrypt
|
||||
#define aes_xts_encrypt ce_aes_xts_encrypt
|
||||
#define aes_xts_decrypt ce_aes_xts_decrypt
|
||||
#define aes_mac_update ce_aes_mac_update
|
||||
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
|
||||
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS/XCTR using ARMv8 Crypto Extensions");
|
||||
#else
|
||||
#define MODE "neon"
|
||||
#define PRIO 200
|
||||
@ -50,16 +51,18 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
|
||||
#define aes_essiv_cbc_encrypt neon_aes_essiv_cbc_encrypt
|
||||
#define aes_essiv_cbc_decrypt neon_aes_essiv_cbc_decrypt
|
||||
#define aes_ctr_encrypt neon_aes_ctr_encrypt
|
||||
#define aes_xctr_encrypt neon_aes_xctr_encrypt
|
||||
#define aes_xts_encrypt neon_aes_xts_encrypt
|
||||
#define aes_xts_decrypt neon_aes_xts_decrypt
|
||||
#define aes_mac_update neon_aes_mac_update
|
||||
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
|
||||
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS/XCTR using ARMv8 NEON");
|
||||
#endif
|
||||
#if defined(USE_V8_CRYPTO_EXTENSIONS) || !IS_ENABLED(CONFIG_CRYPTO_AES_ARM64_BS)
|
||||
MODULE_ALIAS_CRYPTO("ecb(aes)");
|
||||
MODULE_ALIAS_CRYPTO("cbc(aes)");
|
||||
MODULE_ALIAS_CRYPTO("ctr(aes)");
|
||||
MODULE_ALIAS_CRYPTO("xts(aes)");
|
||||
MODULE_ALIAS_CRYPTO("xctr(aes)");
|
||||
#endif
|
||||
MODULE_ALIAS_CRYPTO("cts(cbc(aes))");
|
||||
MODULE_ALIAS_CRYPTO("essiv(cbc(aes),sha256)");
|
||||
@ -89,6 +92,9 @@ asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int bytes, u8 ctr[]);
|
||||
|
||||
asmlinkage void aes_xctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int bytes, u8 ctr[], int byte_ctr);
|
||||
|
||||
asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
|
||||
int rounds, int bytes, u32 const rk2[], u8 iv[],
|
||||
int first);
|
||||
@ -442,6 +448,52 @@ static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req)
|
||||
return err ?: cbc_decrypt_walk(req, &walk);
|
||||
}
|
||||
|
||||
static int __maybe_unused xctr_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err, rounds = 6 + ctx->key_length / 4;
|
||||
struct skcipher_walk walk;
|
||||
unsigned int byte_ctr = 0;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
|
||||
/*
|
||||
* If given less than 16 bytes, we must copy the partial block
|
||||
* into a temporary buffer of 16 bytes to avoid out of bounds
|
||||
* reads and writes. Furthermore, this code is somewhat unusual
|
||||
* in that it expects the end of the data to be at the end of
|
||||
* the temporary buffer, rather than the start of the data at
|
||||
* the start of the temporary buffer.
|
||||
*/
|
||||
if (unlikely(nbytes < AES_BLOCK_SIZE))
|
||||
src = dst = memcpy(buf + sizeof(buf) - nbytes,
|
||||
src, nbytes);
|
||||
else if (nbytes < walk.total)
|
||||
nbytes &= ~(AES_BLOCK_SIZE - 1);
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
|
||||
walk.iv, byte_ctr);
|
||||
kernel_neon_end();
|
||||
|
||||
if (unlikely(nbytes < AES_BLOCK_SIZE))
|
||||
memcpy(walk.dst.virt.addr,
|
||||
buf + sizeof(buf) - nbytes, nbytes);
|
||||
byte_ctr += nbytes;
|
||||
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __maybe_unused ctr_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
@ -457,6 +509,14 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req)
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
|
||||
/*
|
||||
* If given less than 16 bytes, we must copy the partial block
|
||||
* into a temporary buffer of 16 bytes to avoid out of bounds
|
||||
* reads and writes. Furthermore, this code is somewhat unusual
|
||||
* in that it expects the end of the data to be at the end of
|
||||
* the temporary buffer, rather than the start of the data at
|
||||
* the start of the temporary buffer.
|
||||
*/
|
||||
if (unlikely(nbytes < AES_BLOCK_SIZE))
|
||||
src = dst = memcpy(buf + sizeof(buf) - nbytes,
|
||||
src, nbytes);
|
||||
@ -669,6 +729,22 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.setkey = skcipher_aes_setkey,
|
||||
.encrypt = ctr_encrypt,
|
||||
.decrypt = ctr_encrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "xctr(aes)",
|
||||
.cra_driver_name = "xctr-aes-" MODE,
|
||||
.cra_priority = PRIO,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.setkey = skcipher_aes_setkey,
|
||||
.encrypt = xctr_encrypt,
|
||||
.decrypt = xctr_encrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "xts(aes)",
|
||||
|
@ -318,127 +318,211 @@ AES_FUNC_END(aes_cbc_cts_decrypt)
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.previous
|
||||
|
||||
|
||||
/*
|
||||
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int bytes, u8 ctr[])
|
||||
* This macro generates the code for CTR and XCTR mode.
|
||||
*/
|
||||
.macro ctr_encrypt xctr
|
||||
// Arguments
|
||||
OUT .req x0
|
||||
IN .req x1
|
||||
KEY .req x2
|
||||
ROUNDS_W .req w3
|
||||
BYTES_W .req w4
|
||||
IV .req x5
|
||||
BYTE_CTR_W .req w6 // XCTR only
|
||||
// Intermediate values
|
||||
CTR_W .req w11 // XCTR only
|
||||
CTR .req x11 // XCTR only
|
||||
IV_PART .req x12
|
||||
BLOCKS .req x13
|
||||
BLOCKS_W .req w13
|
||||
|
||||
AES_FUNC_START(aes_ctr_encrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
enc_prepare w3, x2, x12
|
||||
ld1 {vctr.16b}, [x5]
|
||||
enc_prepare ROUNDS_W, KEY, IV_PART
|
||||
ld1 {vctr.16b}, [IV]
|
||||
|
||||
umov x12, vctr.d[1] /* keep swabbed ctr in reg */
|
||||
rev x12, x12
|
||||
/*
|
||||
* Keep 64 bits of the IV in a register. For CTR mode this lets us
|
||||
* easily increment the IV. For XCTR mode this lets us efficiently XOR
|
||||
* the 64-bit counter with the IV.
|
||||
*/
|
||||
.if \xctr
|
||||
umov IV_PART, vctr.d[0]
|
||||
lsr CTR_W, BYTE_CTR_W, #4
|
||||
.else
|
||||
umov IV_PART, vctr.d[1]
|
||||
rev IV_PART, IV_PART
|
||||
.endif
|
||||
|
||||
.LctrloopNx:
|
||||
add w7, w4, #15
|
||||
sub w4, w4, #MAX_STRIDE << 4
|
||||
lsr w7, w7, #4
|
||||
.LctrloopNx\xctr:
|
||||
add BLOCKS_W, BYTES_W, #15
|
||||
sub BYTES_W, BYTES_W, #MAX_STRIDE << 4
|
||||
lsr BLOCKS_W, BLOCKS_W, #4
|
||||
mov w8, #MAX_STRIDE
|
||||
cmp w7, w8
|
||||
csel w7, w7, w8, lt
|
||||
adds x12, x12, x7
|
||||
cmp BLOCKS_W, w8
|
||||
csel BLOCKS_W, BLOCKS_W, w8, lt
|
||||
|
||||
/*
|
||||
* Set up the counter values in v0-v{MAX_STRIDE-1}.
|
||||
*
|
||||
* If we are encrypting less than MAX_STRIDE blocks, the tail block
|
||||
* handling code expects the last keystream block to be in
|
||||
* v{MAX_STRIDE-1}. For example: if encrypting two blocks with
|
||||
* MAX_STRIDE=5, then v3 and v4 should have the next two counter blocks.
|
||||
*/
|
||||
.if \xctr
|
||||
add CTR, CTR, BLOCKS
|
||||
.else
|
||||
adds IV_PART, IV_PART, BLOCKS
|
||||
.endif
|
||||
mov v0.16b, vctr.16b
|
||||
mov v1.16b, vctr.16b
|
||||
mov v2.16b, vctr.16b
|
||||
mov v3.16b, vctr.16b
|
||||
ST5( mov v4.16b, vctr.16b )
|
||||
bcs 0f
|
||||
.if \xctr
|
||||
sub x6, CTR, #MAX_STRIDE - 1
|
||||
sub x7, CTR, #MAX_STRIDE - 2
|
||||
sub x8, CTR, #MAX_STRIDE - 3
|
||||
sub x9, CTR, #MAX_STRIDE - 4
|
||||
ST5( sub x10, CTR, #MAX_STRIDE - 5 )
|
||||
eor x6, x6, IV_PART
|
||||
eor x7, x7, IV_PART
|
||||
eor x8, x8, IV_PART
|
||||
eor x9, x9, IV_PART
|
||||
ST5( eor x10, x10, IV_PART )
|
||||
mov v0.d[0], x6
|
||||
mov v1.d[0], x7
|
||||
mov v2.d[0], x8
|
||||
mov v3.d[0], x9
|
||||
ST5( mov v4.d[0], x10 )
|
||||
.else
|
||||
bcs 0f
|
||||
.subsection 1
|
||||
/*
|
||||
* This subsection handles carries.
|
||||
*
|
||||
* Conditional branching here is allowed with respect to time
|
||||
* invariance since the branches are dependent on the IV instead
|
||||
* of the plaintext or key. This code is rarely executed in
|
||||
* practice anyway.
|
||||
*/
|
||||
|
||||
.subsection 1
|
||||
/* apply carry to outgoing counter */
|
||||
0: umov x8, vctr.d[0]
|
||||
rev x8, x8
|
||||
add x8, x8, #1
|
||||
rev x8, x8
|
||||
ins vctr.d[0], x8
|
||||
/* Apply carry to outgoing counter. */
|
||||
0: umov x8, vctr.d[0]
|
||||
rev x8, x8
|
||||
add x8, x8, #1
|
||||
rev x8, x8
|
||||
ins vctr.d[0], x8
|
||||
|
||||
/* apply carry to N counter blocks for N := x12 */
|
||||
cbz x12, 2f
|
||||
adr x16, 1f
|
||||
sub x16, x16, x12, lsl #3
|
||||
br x16
|
||||
bti c
|
||||
mov v0.d[0], vctr.d[0]
|
||||
bti c
|
||||
mov v1.d[0], vctr.d[0]
|
||||
bti c
|
||||
mov v2.d[0], vctr.d[0]
|
||||
bti c
|
||||
mov v3.d[0], vctr.d[0]
|
||||
ST5( bti c )
|
||||
ST5( mov v4.d[0], vctr.d[0] )
|
||||
1: b 2f
|
||||
.previous
|
||||
/*
|
||||
* Apply carry to counter blocks if needed.
|
||||
*
|
||||
* Since the carry flag was set, we know 0 <= IV_PART <
|
||||
* MAX_STRIDE. Using the value of IV_PART we can determine how
|
||||
* many counter blocks need to be updated.
|
||||
*/
|
||||
cbz IV_PART, 2f
|
||||
adr x16, 1f
|
||||
sub x16, x16, IV_PART, lsl #3
|
||||
br x16
|
||||
bti c
|
||||
mov v0.d[0], vctr.d[0]
|
||||
bti c
|
||||
mov v1.d[0], vctr.d[0]
|
||||
bti c
|
||||
mov v2.d[0], vctr.d[0]
|
||||
bti c
|
||||
mov v3.d[0], vctr.d[0]
|
||||
ST5( bti c )
|
||||
ST5( mov v4.d[0], vctr.d[0] )
|
||||
1: b 2f
|
||||
.previous
|
||||
|
||||
2: rev x7, x12
|
||||
ins vctr.d[1], x7
|
||||
sub x7, x12, #MAX_STRIDE - 1
|
||||
sub x8, x12, #MAX_STRIDE - 2
|
||||
sub x9, x12, #MAX_STRIDE - 3
|
||||
rev x7, x7
|
||||
rev x8, x8
|
||||
mov v1.d[1], x7
|
||||
rev x9, x9
|
||||
ST5( sub x10, x12, #MAX_STRIDE - 4 )
|
||||
mov v2.d[1], x8
|
||||
ST5( rev x10, x10 )
|
||||
mov v3.d[1], x9
|
||||
ST5( mov v4.d[1], x10 )
|
||||
tbnz w4, #31, .Lctrtail
|
||||
ld1 {v5.16b-v7.16b}, [x1], #48
|
||||
2: rev x7, IV_PART
|
||||
ins vctr.d[1], x7
|
||||
sub x7, IV_PART, #MAX_STRIDE - 1
|
||||
sub x8, IV_PART, #MAX_STRIDE - 2
|
||||
sub x9, IV_PART, #MAX_STRIDE - 3
|
||||
rev x7, x7
|
||||
rev x8, x8
|
||||
mov v1.d[1], x7
|
||||
rev x9, x9
|
||||
ST5( sub x10, IV_PART, #MAX_STRIDE - 4 )
|
||||
mov v2.d[1], x8
|
||||
ST5( rev x10, x10 )
|
||||
mov v3.d[1], x9
|
||||
ST5( mov v4.d[1], x10 )
|
||||
.endif
|
||||
|
||||
/*
|
||||
* If there are at least MAX_STRIDE blocks left, XOR the data with
|
||||
* keystream and store. Otherwise jump to tail handling.
|
||||
*/
|
||||
tbnz BYTES_W, #31, .Lctrtail\xctr
|
||||
ld1 {v5.16b-v7.16b}, [IN], #48
|
||||
ST4( bl aes_encrypt_block4x )
|
||||
ST5( bl aes_encrypt_block5x )
|
||||
eor v0.16b, v5.16b, v0.16b
|
||||
ST4( ld1 {v5.16b}, [x1], #16 )
|
||||
ST4( ld1 {v5.16b}, [IN], #16 )
|
||||
eor v1.16b, v6.16b, v1.16b
|
||||
ST5( ld1 {v5.16b-v6.16b}, [x1], #32 )
|
||||
ST5( ld1 {v5.16b-v6.16b}, [IN], #32 )
|
||||
eor v2.16b, v7.16b, v2.16b
|
||||
eor v3.16b, v5.16b, v3.16b
|
||||
ST5( eor v4.16b, v6.16b, v4.16b )
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
ST5( st1 {v4.16b}, [x0], #16 )
|
||||
cbz w4, .Lctrout
|
||||
b .LctrloopNx
|
||||
st1 {v0.16b-v3.16b}, [OUT], #64
|
||||
ST5( st1 {v4.16b}, [OUT], #16 )
|
||||
cbz BYTES_W, .Lctrout\xctr
|
||||
b .LctrloopNx\xctr
|
||||
|
||||
.Lctrout:
|
||||
st1 {vctr.16b}, [x5] /* return next CTR value */
|
||||
.Lctrout\xctr:
|
||||
.if !\xctr
|
||||
st1 {vctr.16b}, [IV] /* return next CTR value */
|
||||
.endif
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
|
||||
.Lctrtail:
|
||||
/* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */
|
||||
.Lctrtail\xctr:
|
||||
/*
|
||||
* Handle up to MAX_STRIDE * 16 - 1 bytes of plaintext
|
||||
*
|
||||
* This code expects the last keystream block to be in v{MAX_STRIDE-1}.
|
||||
* For example: if encrypting two blocks with MAX_STRIDE=5, then v3 and
|
||||
* v4 should have the next two counter blocks.
|
||||
*
|
||||
* This allows us to store the ciphertext by writing to overlapping
|
||||
* regions of memory. Any invalid ciphertext blocks get overwritten by
|
||||
* correctly computed blocks. This approach greatly simplifies the
|
||||
* logic for storing the ciphertext.
|
||||
*/
|
||||
mov x16, #16
|
||||
ands x6, x4, #0xf
|
||||
csel x13, x6, x16, ne
|
||||
ands w7, BYTES_W, #0xf
|
||||
csel x13, x7, x16, ne
|
||||
|
||||
ST5( cmp w4, #64 - (MAX_STRIDE << 4) )
|
||||
ST5( cmp BYTES_W, #64 - (MAX_STRIDE << 4))
|
||||
ST5( csel x14, x16, xzr, gt )
|
||||
cmp w4, #48 - (MAX_STRIDE << 4)
|
||||
cmp BYTES_W, #48 - (MAX_STRIDE << 4)
|
||||
csel x15, x16, xzr, gt
|
||||
cmp w4, #32 - (MAX_STRIDE << 4)
|
||||
cmp BYTES_W, #32 - (MAX_STRIDE << 4)
|
||||
csel x16, x16, xzr, gt
|
||||
cmp w4, #16 - (MAX_STRIDE << 4)
|
||||
cmp BYTES_W, #16 - (MAX_STRIDE << 4)
|
||||
|
||||
adr_l x12, .Lcts_permute_table
|
||||
add x12, x12, x13
|
||||
ble .Lctrtail1x
|
||||
adr_l x9, .Lcts_permute_table
|
||||
add x9, x9, x13
|
||||
ble .Lctrtail1x\xctr
|
||||
|
||||
ST5( ld1 {v5.16b}, [x1], x14 )
|
||||
ld1 {v6.16b}, [x1], x15
|
||||
ld1 {v7.16b}, [x1], x16
|
||||
ST5( ld1 {v5.16b}, [IN], x14 )
|
||||
ld1 {v6.16b}, [IN], x15
|
||||
ld1 {v7.16b}, [IN], x16
|
||||
|
||||
ST4( bl aes_encrypt_block4x )
|
||||
ST5( bl aes_encrypt_block5x )
|
||||
|
||||
ld1 {v8.16b}, [x1], x13
|
||||
ld1 {v9.16b}, [x1]
|
||||
ld1 {v10.16b}, [x12]
|
||||
ld1 {v8.16b}, [IN], x13
|
||||
ld1 {v9.16b}, [IN]
|
||||
ld1 {v10.16b}, [x9]
|
||||
|
||||
ST4( eor v6.16b, v6.16b, v0.16b )
|
||||
ST4( eor v7.16b, v7.16b, v1.16b )
|
||||
@ -453,32 +537,91 @@ ST5( eor v7.16b, v7.16b, v2.16b )
|
||||
ST5( eor v8.16b, v8.16b, v3.16b )
|
||||
ST5( eor v9.16b, v9.16b, v4.16b )
|
||||
|
||||
ST5( st1 {v5.16b}, [x0], x14 )
|
||||
st1 {v6.16b}, [x0], x15
|
||||
st1 {v7.16b}, [x0], x16
|
||||
add x13, x13, x0
|
||||
ST5( st1 {v5.16b}, [OUT], x14 )
|
||||
st1 {v6.16b}, [OUT], x15
|
||||
st1 {v7.16b}, [OUT], x16
|
||||
add x13, x13, OUT
|
||||
st1 {v9.16b}, [x13] // overlapping stores
|
||||
st1 {v8.16b}, [x0]
|
||||
b .Lctrout
|
||||
st1 {v8.16b}, [OUT]
|
||||
b .Lctrout\xctr
|
||||
|
||||
.Lctrtail1x:
|
||||
sub x7, x6, #16
|
||||
csel x6, x6, x7, eq
|
||||
add x1, x1, x6
|
||||
add x0, x0, x6
|
||||
ld1 {v5.16b}, [x1]
|
||||
ld1 {v6.16b}, [x0]
|
||||
.Lctrtail1x\xctr:
|
||||
/*
|
||||
* Handle <= 16 bytes of plaintext
|
||||
*
|
||||
* This code always reads and writes 16 bytes. To avoid out of bounds
|
||||
* accesses, XCTR and CTR modes must use a temporary buffer when
|
||||
* encrypting/decrypting less than 16 bytes.
|
||||
*
|
||||
* This code is unusual in that it loads the input and stores the output
|
||||
* relative to the end of the buffers rather than relative to the start.
|
||||
* This causes unusual behaviour when encrypting/decrypting less than 16
|
||||
* bytes; the end of the data is expected to be at the end of the
|
||||
* temporary buffer rather than the start of the data being at the start
|
||||
* of the temporary buffer.
|
||||
*/
|
||||
sub x8, x7, #16
|
||||
csel x7, x7, x8, eq
|
||||
add IN, IN, x7
|
||||
add OUT, OUT, x7
|
||||
ld1 {v5.16b}, [IN]
|
||||
ld1 {v6.16b}, [OUT]
|
||||
ST5( mov v3.16b, v4.16b )
|
||||
encrypt_block v3, w3, x2, x8, w7
|
||||
ld1 {v10.16b-v11.16b}, [x12]
|
||||
encrypt_block v3, ROUNDS_W, KEY, x8, w7
|
||||
ld1 {v10.16b-v11.16b}, [x9]
|
||||
tbl v3.16b, {v3.16b}, v10.16b
|
||||
sshr v11.16b, v11.16b, #7
|
||||
eor v5.16b, v5.16b, v3.16b
|
||||
bif v5.16b, v6.16b, v11.16b
|
||||
st1 {v5.16b}, [x0]
|
||||
b .Lctrout
|
||||
st1 {v5.16b}, [OUT]
|
||||
b .Lctrout\xctr
|
||||
|
||||
// Arguments
|
||||
.unreq OUT
|
||||
.unreq IN
|
||||
.unreq KEY
|
||||
.unreq ROUNDS_W
|
||||
.unreq BYTES_W
|
||||
.unreq IV
|
||||
.unreq BYTE_CTR_W // XCTR only
|
||||
// Intermediate values
|
||||
.unreq CTR_W // XCTR only
|
||||
.unreq CTR // XCTR only
|
||||
.unreq IV_PART
|
||||
.unreq BLOCKS
|
||||
.unreq BLOCKS_W
|
||||
.endm
|
||||
|
||||
/*
|
||||
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int bytes, u8 ctr[])
|
||||
*
|
||||
* The input and output buffers must always be at least 16 bytes even if
|
||||
* encrypting/decrypting less than 16 bytes. Otherwise out of bounds
|
||||
* accesses will occur. The data to be encrypted/decrypted is expected
|
||||
* to be at the end of this 16-byte temporary buffer rather than the
|
||||
* start.
|
||||
*/
|
||||
|
||||
AES_FUNC_START(aes_ctr_encrypt)
|
||||
ctr_encrypt 0
|
||||
AES_FUNC_END(aes_ctr_encrypt)
|
||||
|
||||
/*
|
||||
* aes_xctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int bytes, u8 const iv[], int byte_ctr)
|
||||
*
|
||||
* The input and output buffers must always be at least 16 bytes even if
|
||||
* encrypting/decrypting less than 16 bytes. Otherwise out of bounds
|
||||
* accesses will occur. The data to be encrypted/decrypted is expected
|
||||
* to be at the end of this 16-byte temporary buffer rather than the
|
||||
* start.
|
||||
*/
|
||||
|
||||
AES_FUNC_START(aes_xctr_encrypt)
|
||||
ctr_encrypt 1
|
||||
AES_FUNC_END(aes_xctr_encrypt)
|
||||
|
||||
|
||||
/*
|
||||
* aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
|
||||
|
361
arch/arm64/crypto/polyval-ce-core.S
Normal file
361
arch/arm64/crypto/polyval-ce-core.S
Normal file
@ -0,0 +1,361 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Implementation of POLYVAL using ARMv8 Crypto Extensions.
|
||||
*
|
||||
* Copyright 2021 Google LLC
|
||||
*/
|
||||
/*
|
||||
* This is an efficient implementation of POLYVAL using ARMv8 Crypto Extensions
|
||||
* It works on 8 blocks at a time, by precomputing the first 8 keys powers h^8,
|
||||
* ..., h^1 in the POLYVAL finite field. This precomputation allows us to split
|
||||
* finite field multiplication into two steps.
|
||||
*
|
||||
* In the first step, we consider h^i, m_i as normal polynomials of degree less
|
||||
* than 128. We then compute p(x) = h^8m_0 + ... + h^1m_7 where multiplication
|
||||
* is simply polynomial multiplication.
|
||||
*
|
||||
* In the second step, we compute the reduction of p(x) modulo the finite field
|
||||
* modulus g(x) = x^128 + x^127 + x^126 + x^121 + 1.
|
||||
*
|
||||
* This two step process is equivalent to computing h^8m_0 + ... + h^1m_7 where
|
||||
* multiplication is finite field multiplication. The advantage is that the
|
||||
* two-step process only requires 1 finite field reduction for every 8
|
||||
* polynomial multiplications. Further parallelism is gained by interleaving the
|
||||
* multiplications and polynomial reductions.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#define STRIDE_BLOCKS 8
|
||||
|
||||
KEY_POWERS .req x0
|
||||
MSG .req x1
|
||||
BLOCKS_LEFT .req x2
|
||||
ACCUMULATOR .req x3
|
||||
KEY_START .req x10
|
||||
EXTRA_BYTES .req x11
|
||||
TMP .req x13
|
||||
|
||||
M0 .req v0
|
||||
M1 .req v1
|
||||
M2 .req v2
|
||||
M3 .req v3
|
||||
M4 .req v4
|
||||
M5 .req v5
|
||||
M6 .req v6
|
||||
M7 .req v7
|
||||
KEY8 .req v8
|
||||
KEY7 .req v9
|
||||
KEY6 .req v10
|
||||
KEY5 .req v11
|
||||
KEY4 .req v12
|
||||
KEY3 .req v13
|
||||
KEY2 .req v14
|
||||
KEY1 .req v15
|
||||
PL .req v16
|
||||
PH .req v17
|
||||
TMP_V .req v18
|
||||
LO .req v20
|
||||
MI .req v21
|
||||
HI .req v22
|
||||
SUM .req v23
|
||||
GSTAR .req v24
|
||||
|
||||
.text
|
||||
|
||||
.arch armv8-a+crypto
|
||||
.align 4
|
||||
|
||||
.Lgstar:
|
||||
.quad 0xc200000000000000, 0xc200000000000000
|
||||
|
||||
/*
|
||||
* Computes the product of two 128-bit polynomials in X and Y and XORs the
|
||||
* components of the 256-bit product into LO, MI, HI.
|
||||
*
|
||||
* Given:
|
||||
* X = [X_1 : X_0]
|
||||
* Y = [Y_1 : Y_0]
|
||||
*
|
||||
* We compute:
|
||||
* LO += X_0 * Y_0
|
||||
* MI += (X_0 + X_1) * (Y_0 + Y_1)
|
||||
* HI += X_1 * Y_1
|
||||
*
|
||||
* Later, the 256-bit result can be extracted as:
|
||||
* [HI_1 : HI_0 + HI_1 + MI_1 + LO_1 : LO_1 + HI_0 + MI_0 + LO_0 : LO_0]
|
||||
* This step is done when computing the polynomial reduction for efficiency
|
||||
* reasons.
|
||||
*
|
||||
* Karatsuba multiplication is used instead of Schoolbook multiplication because
|
||||
* it was found to be slightly faster on ARM64 CPUs.
|
||||
*
|
||||
*/
|
||||
.macro karatsuba1 X Y
|
||||
X .req \X
|
||||
Y .req \Y
|
||||
ext v25.16b, X.16b, X.16b, #8
|
||||
ext v26.16b, Y.16b, Y.16b, #8
|
||||
eor v25.16b, v25.16b, X.16b
|
||||
eor v26.16b, v26.16b, Y.16b
|
||||
pmull2 v28.1q, X.2d, Y.2d
|
||||
pmull v29.1q, X.1d, Y.1d
|
||||
pmull v27.1q, v25.1d, v26.1d
|
||||
eor HI.16b, HI.16b, v28.16b
|
||||
eor LO.16b, LO.16b, v29.16b
|
||||
eor MI.16b, MI.16b, v27.16b
|
||||
.unreq X
|
||||
.unreq Y
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Same as karatsuba1, except overwrites HI, LO, MI rather than XORing into
|
||||
* them.
|
||||
*/
|
||||
.macro karatsuba1_store X Y
|
||||
X .req \X
|
||||
Y .req \Y
|
||||
ext v25.16b, X.16b, X.16b, #8
|
||||
ext v26.16b, Y.16b, Y.16b, #8
|
||||
eor v25.16b, v25.16b, X.16b
|
||||
eor v26.16b, v26.16b, Y.16b
|
||||
pmull2 HI.1q, X.2d, Y.2d
|
||||
pmull LO.1q, X.1d, Y.1d
|
||||
pmull MI.1q, v25.1d, v26.1d
|
||||
.unreq X
|
||||
.unreq Y
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Computes the 256-bit polynomial represented by LO, HI, MI. Stores
|
||||
* the result in PL, PH.
|
||||
* [PH : PL] =
|
||||
* [HI_1 : HI_1 + HI_0 + MI_1 + LO_1 : HI_0 + MI_0 + LO_1 + LO_0 : LO_0]
|
||||
*/
|
||||
.macro karatsuba2
|
||||
// v4 = [HI_1 + MI_1 : HI_0 + MI_0]
|
||||
eor v4.16b, HI.16b, MI.16b
|
||||
// v4 = [HI_1 + MI_1 + LO_1 : HI_0 + MI_0 + LO_0]
|
||||
eor v4.16b, v4.16b, LO.16b
|
||||
// v5 = [HI_0 : LO_1]
|
||||
ext v5.16b, LO.16b, HI.16b, #8
|
||||
// v4 = [HI_1 + HI_0 + MI_1 + LO_1 : HI_0 + MI_0 + LO_1 + LO_0]
|
||||
eor v4.16b, v4.16b, v5.16b
|
||||
// HI = [HI_0 : HI_1]
|
||||
ext HI.16b, HI.16b, HI.16b, #8
|
||||
// LO = [LO_0 : LO_1]
|
||||
ext LO.16b, LO.16b, LO.16b, #8
|
||||
// PH = [HI_1 : HI_1 + HI_0 + MI_1 + LO_1]
|
||||
ext PH.16b, v4.16b, HI.16b, #8
|
||||
// PL = [HI_0 + MI_0 + LO_1 + LO_0 : LO_0]
|
||||
ext PL.16b, LO.16b, v4.16b, #8
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Computes the 128-bit reduction of PH : PL. Stores the result in dest.
|
||||
*
|
||||
* This macro computes p(x) mod g(x) where p(x) is in montgomery form and g(x) =
|
||||
* x^128 + x^127 + x^126 + x^121 + 1.
|
||||
*
|
||||
* We have a 256-bit polynomial PH : PL = P_3 : P_2 : P_1 : P_0 that is the
|
||||
* product of two 128-bit polynomials in Montgomery form. We need to reduce it
|
||||
* mod g(x). Also, since polynomials in Montgomery form have an "extra" factor
|
||||
* of x^128, this product has two extra factors of x^128. To get it back into
|
||||
* Montgomery form, we need to remove one of these factors by dividing by x^128.
|
||||
*
|
||||
* To accomplish both of these goals, we add multiples of g(x) that cancel out
|
||||
* the low 128 bits P_1 : P_0, leaving just the high 128 bits. Since the low
|
||||
* bits are zero, the polynomial division by x^128 can be done by right
|
||||
* shifting.
|
||||
*
|
||||
* Since the only nonzero term in the low 64 bits of g(x) is the constant term,
|
||||
* the multiple of g(x) needed to cancel out P_0 is P_0 * g(x). The CPU can
|
||||
* only do 64x64 bit multiplications, so split P_0 * g(x) into x^128 * P_0 +
|
||||
* x^64 * g*(x) * P_0 + P_0, where g*(x) is bits 64-127 of g(x). Adding this to
|
||||
* the original polynomial gives P_3 : P_2 + P_0 + T_1 : P_1 + T_0 : 0, where T
|
||||
* = T_1 : T_0 = g*(x) * P_0. Thus, bits 0-63 got "folded" into bits 64-191.
|
||||
*
|
||||
* Repeating this same process on the next 64 bits "folds" bits 64-127 into bits
|
||||
* 128-255, giving the answer in bits 128-255. This time, we need to cancel P_1
|
||||
* + T_0 in bits 64-127. The multiple of g(x) required is (P_1 + T_0) * g(x) *
|
||||
* x^64. Adding this to our previous computation gives P_3 + P_1 + T_0 + V_1 :
|
||||
* P_2 + P_0 + T_1 + V_0 : 0 : 0, where V = V_1 : V_0 = g*(x) * (P_1 + T_0).
|
||||
*
|
||||
* So our final computation is:
|
||||
* T = T_1 : T_0 = g*(x) * P_0
|
||||
* V = V_1 : V_0 = g*(x) * (P_1 + T_0)
|
||||
* p(x) / x^{128} mod g(x) = P_3 + P_1 + T_0 + V_1 : P_2 + P_0 + T_1 + V_0
|
||||
*
|
||||
* The implementation below saves a XOR instruction by computing P_1 + T_0 : P_0
|
||||
* + T_1 and XORing into dest, rather than separately XORing P_1 : P_0 and T_0 :
|
||||
* T_1 into dest. This allows us to reuse P_1 + T_0 when computing V.
|
||||
*/
|
||||
.macro montgomery_reduction dest
|
||||
DEST .req \dest
|
||||
// TMP_V = T_1 : T_0 = P_0 * g*(x)
|
||||
pmull TMP_V.1q, PL.1d, GSTAR.1d
|
||||
// TMP_V = T_0 : T_1
|
||||
ext TMP_V.16b, TMP_V.16b, TMP_V.16b, #8
|
||||
// TMP_V = P_1 + T_0 : P_0 + T_1
|
||||
eor TMP_V.16b, PL.16b, TMP_V.16b
|
||||
// PH = P_3 + P_1 + T_0 : P_2 + P_0 + T_1
|
||||
eor PH.16b, PH.16b, TMP_V.16b
|
||||
// TMP_V = V_1 : V_0 = (P_1 + T_0) * g*(x)
|
||||
pmull2 TMP_V.1q, TMP_V.2d, GSTAR.2d
|
||||
eor DEST.16b, PH.16b, TMP_V.16b
|
||||
.unreq DEST
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Compute Polyval on 8 blocks.
|
||||
*
|
||||
* If reduce is set, also computes the montgomery reduction of the
|
||||
* previous full_stride call and XORs with the first message block.
|
||||
* (m_0 + REDUCE(PL, PH))h^8 + ... + m_7h^1.
|
||||
* I.e., the first multiplication uses m_0 + REDUCE(PL, PH) instead of m_0.
|
||||
*
|
||||
* Sets PL, PH.
|
||||
*/
|
||||
.macro full_stride reduce
|
||||
eor LO.16b, LO.16b, LO.16b
|
||||
eor MI.16b, MI.16b, MI.16b
|
||||
eor HI.16b, HI.16b, HI.16b
|
||||
|
||||
ld1 {M0.16b, M1.16b, M2.16b, M3.16b}, [MSG], #64
|
||||
ld1 {M4.16b, M5.16b, M6.16b, M7.16b}, [MSG], #64
|
||||
|
||||
karatsuba1 M7 KEY1
|
||||
.if \reduce
|
||||
pmull TMP_V.1q, PL.1d, GSTAR.1d
|
||||
.endif
|
||||
|
||||
karatsuba1 M6 KEY2
|
||||
.if \reduce
|
||||
ext TMP_V.16b, TMP_V.16b, TMP_V.16b, #8
|
||||
.endif
|
||||
|
||||
karatsuba1 M5 KEY3
|
||||
.if \reduce
|
||||
eor TMP_V.16b, PL.16b, TMP_V.16b
|
||||
.endif
|
||||
|
||||
karatsuba1 M4 KEY4
|
||||
.if \reduce
|
||||
eor PH.16b, PH.16b, TMP_V.16b
|
||||
.endif
|
||||
|
||||
karatsuba1 M3 KEY5
|
||||
.if \reduce
|
||||
pmull2 TMP_V.1q, TMP_V.2d, GSTAR.2d
|
||||
.endif
|
||||
|
||||
karatsuba1 M2 KEY6
|
||||
.if \reduce
|
||||
eor SUM.16b, PH.16b, TMP_V.16b
|
||||
.endif
|
||||
|
||||
karatsuba1 M1 KEY7
|
||||
eor M0.16b, M0.16b, SUM.16b
|
||||
|
||||
karatsuba1 M0 KEY8
|
||||
karatsuba2
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Handle any extra blocks after full_stride loop.
|
||||
*/
|
||||
.macro partial_stride
|
||||
add KEY_POWERS, KEY_START, #(STRIDE_BLOCKS << 4)
|
||||
sub KEY_POWERS, KEY_POWERS, BLOCKS_LEFT, lsl #4
|
||||
ld1 {KEY1.16b}, [KEY_POWERS], #16
|
||||
|
||||
ld1 {TMP_V.16b}, [MSG], #16
|
||||
eor SUM.16b, SUM.16b, TMP_V.16b
|
||||
karatsuba1_store KEY1 SUM
|
||||
sub BLOCKS_LEFT, BLOCKS_LEFT, #1
|
||||
|
||||
tst BLOCKS_LEFT, #4
|
||||
beq .Lpartial4BlocksDone
|
||||
ld1 {M0.16b, M1.16b, M2.16b, M3.16b}, [MSG], #64
|
||||
ld1 {KEY8.16b, KEY7.16b, KEY6.16b, KEY5.16b}, [KEY_POWERS], #64
|
||||
karatsuba1 M0 KEY8
|
||||
karatsuba1 M1 KEY7
|
||||
karatsuba1 M2 KEY6
|
||||
karatsuba1 M3 KEY5
|
||||
.Lpartial4BlocksDone:
|
||||
tst BLOCKS_LEFT, #2
|
||||
beq .Lpartial2BlocksDone
|
||||
ld1 {M0.16b, M1.16b}, [MSG], #32
|
||||
ld1 {KEY8.16b, KEY7.16b}, [KEY_POWERS], #32
|
||||
karatsuba1 M0 KEY8
|
||||
karatsuba1 M1 KEY7
|
||||
.Lpartial2BlocksDone:
|
||||
tst BLOCKS_LEFT, #1
|
||||
beq .LpartialDone
|
||||
ld1 {M0.16b}, [MSG], #16
|
||||
ld1 {KEY8.16b}, [KEY_POWERS], #16
|
||||
karatsuba1 M0 KEY8
|
||||
.LpartialDone:
|
||||
karatsuba2
|
||||
montgomery_reduction SUM
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Perform montgomery multiplication in GF(2^128) and store result in op1.
|
||||
*
|
||||
* Computes op1*op2*x^{-128} mod x^128 + x^127 + x^126 + x^121 + 1
|
||||
* If op1, op2 are in montgomery form, this computes the montgomery
|
||||
* form of op1*op2.
|
||||
*
|
||||
* void pmull_polyval_mul(u8 *op1, const u8 *op2);
|
||||
*/
|
||||
SYM_FUNC_START(pmull_polyval_mul)
|
||||
adr TMP, .Lgstar
|
||||
ld1 {GSTAR.2d}, [TMP]
|
||||
ld1 {v0.16b}, [x0]
|
||||
ld1 {v1.16b}, [x1]
|
||||
karatsuba1_store v0 v1
|
||||
karatsuba2
|
||||
montgomery_reduction SUM
|
||||
st1 {SUM.16b}, [x0]
|
||||
ret
|
||||
SYM_FUNC_END(pmull_polyval_mul)
|
||||
|
||||
/*
|
||||
* Perform polynomial evaluation as specified by POLYVAL. This computes:
|
||||
* h^n * accumulator + h^n * m_0 + ... + h^1 * m_{n-1}
|
||||
* where n=nblocks, h is the hash key, and m_i are the message blocks.
|
||||
*
|
||||
* x0 - pointer to precomputed key powers h^8 ... h^1
|
||||
* x1 - pointer to message blocks
|
||||
* x2 - number of blocks to hash
|
||||
* x3 - pointer to accumulator
|
||||
*
|
||||
* void pmull_polyval_update(const struct polyval_ctx *ctx, const u8 *in,
|
||||
* size_t nblocks, u8 *accumulator);
|
||||
*/
|
||||
SYM_FUNC_START(pmull_polyval_update)
|
||||
adr TMP, .Lgstar
|
||||
mov KEY_START, KEY_POWERS
|
||||
ld1 {GSTAR.2d}, [TMP]
|
||||
ld1 {SUM.16b}, [ACCUMULATOR]
|
||||
subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
|
||||
blt .LstrideLoopExit
|
||||
ld1 {KEY8.16b, KEY7.16b, KEY6.16b, KEY5.16b}, [KEY_POWERS], #64
|
||||
ld1 {KEY4.16b, KEY3.16b, KEY2.16b, KEY1.16b}, [KEY_POWERS], #64
|
||||
full_stride 0
|
||||
subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
|
||||
blt .LstrideLoopExitReduce
|
||||
.LstrideLoop:
|
||||
full_stride 1
|
||||
subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
|
||||
bge .LstrideLoop
|
||||
.LstrideLoopExitReduce:
|
||||
montgomery_reduction SUM
|
||||
.LstrideLoopExit:
|
||||
adds BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
|
||||
beq .LskipPartial
|
||||
partial_stride
|
||||
.LskipPartial:
|
||||
st1 {SUM.16b}, [ACCUMULATOR]
|
||||
ret
|
||||
SYM_FUNC_END(pmull_polyval_update)
|
191
arch/arm64/crypto/polyval-ce-glue.c
Normal file
191
arch/arm64/crypto/polyval-ce-glue.c
Normal file
@ -0,0 +1,191 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Glue code for POLYVAL using ARMv8 Crypto Extensions
|
||||
*
|
||||
* Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
|
||||
* Copyright (c) 2009 Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
* Copyright 2021 Google LLC
|
||||
*/
|
||||
|
||||
/*
|
||||
* Glue code based on ghash-clmulni-intel_glue.c.
|
||||
*
|
||||
* This implementation of POLYVAL uses montgomery multiplication accelerated by
|
||||
* ARMv8 Crypto Extensions instructions to implement the finite field operations.
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/polyval.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
#define NUM_KEY_POWERS 8
|
||||
|
||||
struct polyval_tfm_ctx {
|
||||
/*
|
||||
* These powers must be in the order h^8, ..., h^1.
|
||||
*/
|
||||
u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
struct polyval_desc_ctx {
|
||||
u8 buffer[POLYVAL_BLOCK_SIZE];
|
||||
u32 bytes;
|
||||
};
|
||||
|
||||
asmlinkage void pmull_polyval_update(const struct polyval_tfm_ctx *keys,
|
||||
const u8 *in, size_t nblocks, u8 *accumulator);
|
||||
asmlinkage void pmull_polyval_mul(u8 *op1, const u8 *op2);
|
||||
|
||||
static void internal_polyval_update(const struct polyval_tfm_ctx *keys,
|
||||
const u8 *in, size_t nblocks, u8 *accumulator)
|
||||
{
|
||||
if (likely(crypto_simd_usable())) {
|
||||
kernel_neon_begin();
|
||||
pmull_polyval_update(keys, in, nblocks, accumulator);
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
polyval_update_non4k(keys->key_powers[NUM_KEY_POWERS-1], in,
|
||||
nblocks, accumulator);
|
||||
}
|
||||
}
|
||||
|
||||
static void internal_polyval_mul(u8 *op1, const u8 *op2)
|
||||
{
|
||||
if (likely(crypto_simd_usable())) {
|
||||
kernel_neon_begin();
|
||||
pmull_polyval_mul(op1, op2);
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
polyval_mul_non4k(op1, op2);
|
||||
}
|
||||
}
|
||||
|
||||
static int polyval_arm64_setkey(struct crypto_shash *tfm,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct polyval_tfm_ctx *tctx = crypto_shash_ctx(tfm);
|
||||
int i;
|
||||
|
||||
if (keylen != POLYVAL_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
memcpy(tctx->key_powers[NUM_KEY_POWERS-1], key, POLYVAL_BLOCK_SIZE);
|
||||
|
||||
for (i = NUM_KEY_POWERS-2; i >= 0; i--) {
|
||||
memcpy(tctx->key_powers[i], key, POLYVAL_BLOCK_SIZE);
|
||||
internal_polyval_mul(tctx->key_powers[i],
|
||||
tctx->key_powers[i+1]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_arm64_init(struct shash_desc *desc)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
|
||||
memset(dctx, 0, sizeof(*dctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_arm64_update(struct shash_desc *desc,
|
||||
const u8 *src, unsigned int srclen)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
u8 *pos;
|
||||
unsigned int nblocks;
|
||||
unsigned int n;
|
||||
|
||||
if (dctx->bytes) {
|
||||
n = min(srclen, dctx->bytes);
|
||||
pos = dctx->buffer + POLYVAL_BLOCK_SIZE - dctx->bytes;
|
||||
|
||||
dctx->bytes -= n;
|
||||
srclen -= n;
|
||||
|
||||
while (n--)
|
||||
*pos++ ^= *src++;
|
||||
|
||||
if (!dctx->bytes)
|
||||
internal_polyval_mul(dctx->buffer,
|
||||
tctx->key_powers[NUM_KEY_POWERS-1]);
|
||||
}
|
||||
|
||||
while (srclen >= POLYVAL_BLOCK_SIZE) {
|
||||
/* allow rescheduling every 4K bytes */
|
||||
nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE;
|
||||
internal_polyval_update(tctx, src, nblocks, dctx->buffer);
|
||||
srclen -= nblocks * POLYVAL_BLOCK_SIZE;
|
||||
src += nblocks * POLYVAL_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
if (srclen) {
|
||||
dctx->bytes = POLYVAL_BLOCK_SIZE - srclen;
|
||||
pos = dctx->buffer;
|
||||
while (srclen--)
|
||||
*pos++ ^= *src++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_arm64_final(struct shash_desc *desc, u8 *dst)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
|
||||
if (dctx->bytes) {
|
||||
internal_polyval_mul(dctx->buffer,
|
||||
tctx->key_powers[NUM_KEY_POWERS-1]);
|
||||
}
|
||||
|
||||
memcpy(dst, dctx->buffer, POLYVAL_BLOCK_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg polyval_alg = {
|
||||
.digestsize = POLYVAL_DIGEST_SIZE,
|
||||
.init = polyval_arm64_init,
|
||||
.update = polyval_arm64_update,
|
||||
.final = polyval_arm64_final,
|
||||
.setkey = polyval_arm64_setkey,
|
||||
.descsize = sizeof(struct polyval_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "polyval",
|
||||
.cra_driver_name = "polyval-ce",
|
||||
.cra_priority = 200,
|
||||
.cra_blocksize = POLYVAL_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct polyval_tfm_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
};
|
||||
|
||||
static int __init polyval_ce_mod_init(void)
|
||||
{
|
||||
return crypto_register_shash(&polyval_alg);
|
||||
}
|
||||
|
||||
static void __exit polyval_ce_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_shash(&polyval_alg);
|
||||
}
|
||||
|
||||
module_cpu_feature_match(PMULL, polyval_ce_mod_init)
|
||||
module_exit(polyval_ce_mod_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("POLYVAL hash function accelerated by ARMv8 Crypto Extensions");
|
||||
MODULE_ALIAS_CRYPTO("polyval");
|
||||
MODULE_ALIAS_CRYPTO("polyval-ce");
|
@ -61,14 +61,15 @@ sha256-ssse3-$(CONFIG_AS_SHA256_NI) += sha256_ni_asm.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
|
||||
sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o
|
||||
blake2s-x86_64-y := blake2s-shash.o
|
||||
obj-$(if $(CONFIG_CRYPTO_BLAKE2S_X86),y) += libblake2s-x86_64.o
|
||||
obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += libblake2s-x86_64.o
|
||||
libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
|
||||
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_POLYVAL_CLMUL_NI) += polyval-clmulni.o
|
||||
polyval-clmulni-y := polyval-clmulni_asm.o polyval-clmulni_glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
|
||||
crc32c-intel-y := crc32c-intel_glue.o
|
||||
crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
|
||||
|
@ -23,6 +23,11 @@
|
||||
|
||||
#define VMOVDQ vmovdqu
|
||||
|
||||
/*
|
||||
* Note: the "x" prefix in these aliases means "this is an xmm register". The
|
||||
* alias prefixes have no relation to XCTR where the "X" prefix means "XOR
|
||||
* counter".
|
||||
*/
|
||||
#define xdata0 %xmm0
|
||||
#define xdata1 %xmm1
|
||||
#define xdata2 %xmm2
|
||||
@ -31,8 +36,10 @@
|
||||
#define xdata5 %xmm5
|
||||
#define xdata6 %xmm6
|
||||
#define xdata7 %xmm7
|
||||
#define xcounter %xmm8
|
||||
#define xbyteswap %xmm9
|
||||
#define xcounter %xmm8 // CTR mode only
|
||||
#define xiv %xmm8 // XCTR mode only
|
||||
#define xbyteswap %xmm9 // CTR mode only
|
||||
#define xtmp %xmm9 // XCTR mode only
|
||||
#define xkey0 %xmm10
|
||||
#define xkey4 %xmm11
|
||||
#define xkey8 %xmm12
|
||||
@ -45,7 +52,7 @@
|
||||
#define p_keys %rdx
|
||||
#define p_out %rcx
|
||||
#define num_bytes %r8
|
||||
|
||||
#define counter %r9 // XCTR mode only
|
||||
#define tmp %r10
|
||||
#define DDQ_DATA 0
|
||||
#define XDATA 1
|
||||
@ -102,7 +109,7 @@ ddq_add_8:
|
||||
* do_aes num_in_par load_keys key_len
|
||||
* This increments p_in, but not p_out
|
||||
*/
|
||||
.macro do_aes b, k, key_len
|
||||
.macro do_aes b, k, key_len, xctr
|
||||
.set by, \b
|
||||
.set load_keys, \k
|
||||
.set klen, \key_len
|
||||
@ -111,29 +118,48 @@ ddq_add_8:
|
||||
vmovdqa 0*16(p_keys), xkey0
|
||||
.endif
|
||||
|
||||
vpshufb xbyteswap, xcounter, xdata0
|
||||
|
||||
.set i, 1
|
||||
.rept (by - 1)
|
||||
club XDATA, i
|
||||
vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata
|
||||
vptest ddq_low_msk(%rip), var_xdata
|
||||
jnz 1f
|
||||
vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata
|
||||
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
|
||||
1:
|
||||
vpshufb xbyteswap, var_xdata, var_xdata
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
.if \xctr
|
||||
movq counter, xtmp
|
||||
.set i, 0
|
||||
.rept (by)
|
||||
club XDATA, i
|
||||
vpaddq (ddq_add_1 + 16 * i)(%rip), xtmp, var_xdata
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
.set i, 0
|
||||
.rept (by)
|
||||
club XDATA, i
|
||||
vpxor xiv, var_xdata, var_xdata
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
.else
|
||||
vpshufb xbyteswap, xcounter, xdata0
|
||||
.set i, 1
|
||||
.rept (by - 1)
|
||||
club XDATA, i
|
||||
vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata
|
||||
vptest ddq_low_msk(%rip), var_xdata
|
||||
jnz 1f
|
||||
vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata
|
||||
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
|
||||
1:
|
||||
vpshufb xbyteswap, var_xdata, var_xdata
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
.endif
|
||||
|
||||
vmovdqa 1*16(p_keys), xkeyA
|
||||
|
||||
vpxor xkey0, xdata0, xdata0
|
||||
vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter
|
||||
vptest ddq_low_msk(%rip), xcounter
|
||||
jnz 1f
|
||||
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
|
||||
1:
|
||||
.if \xctr
|
||||
add $by, counter
|
||||
.else
|
||||
vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter
|
||||
vptest ddq_low_msk(%rip), xcounter
|
||||
jnz 1f
|
||||
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
|
||||
1:
|
||||
.endif
|
||||
|
||||
.set i, 1
|
||||
.rept (by - 1)
|
||||
@ -371,94 +397,99 @@ ddq_add_8:
|
||||
.endr
|
||||
.endm
|
||||
|
||||
.macro do_aes_load val, key_len
|
||||
do_aes \val, 1, \key_len
|
||||
.macro do_aes_load val, key_len, xctr
|
||||
do_aes \val, 1, \key_len, \xctr
|
||||
.endm
|
||||
|
||||
.macro do_aes_noload val, key_len
|
||||
do_aes \val, 0, \key_len
|
||||
.macro do_aes_noload val, key_len, xctr
|
||||
do_aes \val, 0, \key_len, \xctr
|
||||
.endm
|
||||
|
||||
/* main body of aes ctr load */
|
||||
|
||||
.macro do_aes_ctrmain key_len
|
||||
.macro do_aes_ctrmain key_len, xctr
|
||||
cmp $16, num_bytes
|
||||
jb .Ldo_return2\key_len
|
||||
jb .Ldo_return2\xctr\key_len
|
||||
|
||||
vmovdqa byteswap_const(%rip), xbyteswap
|
||||
vmovdqu (p_iv), xcounter
|
||||
vpshufb xbyteswap, xcounter, xcounter
|
||||
.if \xctr
|
||||
shr $4, counter
|
||||
vmovdqu (p_iv), xiv
|
||||
.else
|
||||
vmovdqa byteswap_const(%rip), xbyteswap
|
||||
vmovdqu (p_iv), xcounter
|
||||
vpshufb xbyteswap, xcounter, xcounter
|
||||
.endif
|
||||
|
||||
mov num_bytes, tmp
|
||||
and $(7*16), tmp
|
||||
jz .Lmult_of_8_blks\key_len
|
||||
jz .Lmult_of_8_blks\xctr\key_len
|
||||
|
||||
/* 1 <= tmp <= 7 */
|
||||
cmp $(4*16), tmp
|
||||
jg .Lgt4\key_len
|
||||
je .Leq4\key_len
|
||||
jg .Lgt4\xctr\key_len
|
||||
je .Leq4\xctr\key_len
|
||||
|
||||
.Llt4\key_len:
|
||||
.Llt4\xctr\key_len:
|
||||
cmp $(2*16), tmp
|
||||
jg .Leq3\key_len
|
||||
je .Leq2\key_len
|
||||
jg .Leq3\xctr\key_len
|
||||
je .Leq2\xctr\key_len
|
||||
|
||||
.Leq1\key_len:
|
||||
do_aes_load 1, \key_len
|
||||
.Leq1\xctr\key_len:
|
||||
do_aes_load 1, \key_len, \xctr
|
||||
add $(1*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
jz .Ldo_return2\xctr\key_len
|
||||
jmp .Lmain_loop2\xctr\key_len
|
||||
|
||||
.Leq2\key_len:
|
||||
do_aes_load 2, \key_len
|
||||
.Leq2\xctr\key_len:
|
||||
do_aes_load 2, \key_len, \xctr
|
||||
add $(2*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
jz .Ldo_return2\xctr\key_len
|
||||
jmp .Lmain_loop2\xctr\key_len
|
||||
|
||||
|
||||
.Leq3\key_len:
|
||||
do_aes_load 3, \key_len
|
||||
.Leq3\xctr\key_len:
|
||||
do_aes_load 3, \key_len, \xctr
|
||||
add $(3*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
jz .Ldo_return2\xctr\key_len
|
||||
jmp .Lmain_loop2\xctr\key_len
|
||||
|
||||
.Leq4\key_len:
|
||||
do_aes_load 4, \key_len
|
||||
.Leq4\xctr\key_len:
|
||||
do_aes_load 4, \key_len, \xctr
|
||||
add $(4*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
jz .Ldo_return2\xctr\key_len
|
||||
jmp .Lmain_loop2\xctr\key_len
|
||||
|
||||
.Lgt4\key_len:
|
||||
.Lgt4\xctr\key_len:
|
||||
cmp $(6*16), tmp
|
||||
jg .Leq7\key_len
|
||||
je .Leq6\key_len
|
||||
jg .Leq7\xctr\key_len
|
||||
je .Leq6\xctr\key_len
|
||||
|
||||
.Leq5\key_len:
|
||||
do_aes_load 5, \key_len
|
||||
.Leq5\xctr\key_len:
|
||||
do_aes_load 5, \key_len, \xctr
|
||||
add $(5*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
jz .Ldo_return2\xctr\key_len
|
||||
jmp .Lmain_loop2\xctr\key_len
|
||||
|
||||
.Leq6\key_len:
|
||||
do_aes_load 6, \key_len
|
||||
.Leq6\xctr\key_len:
|
||||
do_aes_load 6, \key_len, \xctr
|
||||
add $(6*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
jz .Ldo_return2\xctr\key_len
|
||||
jmp .Lmain_loop2\xctr\key_len
|
||||
|
||||
.Leq7\key_len:
|
||||
do_aes_load 7, \key_len
|
||||
.Leq7\xctr\key_len:
|
||||
do_aes_load 7, \key_len, \xctr
|
||||
add $(7*16), p_out
|
||||
and $(~7*16), num_bytes
|
||||
jz .Ldo_return2\key_len
|
||||
jmp .Lmain_loop2\key_len
|
||||
jz .Ldo_return2\xctr\key_len
|
||||
jmp .Lmain_loop2\xctr\key_len
|
||||
|
||||
.Lmult_of_8_blks\key_len:
|
||||
.Lmult_of_8_blks\xctr\key_len:
|
||||
.if (\key_len != KEY_128)
|
||||
vmovdqa 0*16(p_keys), xkey0
|
||||
vmovdqa 4*16(p_keys), xkey4
|
||||
@ -471,17 +502,19 @@ ddq_add_8:
|
||||
vmovdqa 9*16(p_keys), xkey12
|
||||
.endif
|
||||
.align 16
|
||||
.Lmain_loop2\key_len:
|
||||
.Lmain_loop2\xctr\key_len:
|
||||
/* num_bytes is a multiple of 8 and >0 */
|
||||
do_aes_noload 8, \key_len
|
||||
do_aes_noload 8, \key_len, \xctr
|
||||
add $(8*16), p_out
|
||||
sub $(8*16), num_bytes
|
||||
jne .Lmain_loop2\key_len
|
||||
jne .Lmain_loop2\xctr\key_len
|
||||
|
||||
.Ldo_return2\key_len:
|
||||
/* return updated IV */
|
||||
vpshufb xbyteswap, xcounter, xcounter
|
||||
vmovdqu xcounter, (p_iv)
|
||||
.Ldo_return2\xctr\key_len:
|
||||
.if !\xctr
|
||||
/* return updated IV */
|
||||
vpshufb xbyteswap, xcounter, xcounter
|
||||
vmovdqu xcounter, (p_iv)
|
||||
.endif
|
||||
RET
|
||||
.endm
|
||||
|
||||
@ -494,7 +527,7 @@ ddq_add_8:
|
||||
*/
|
||||
SYM_FUNC_START(aes_ctr_enc_128_avx_by8)
|
||||
/* call the aes main loop */
|
||||
do_aes_ctrmain KEY_128
|
||||
do_aes_ctrmain KEY_128 0
|
||||
|
||||
SYM_FUNC_END(aes_ctr_enc_128_avx_by8)
|
||||
|
||||
@ -507,7 +540,7 @@ SYM_FUNC_END(aes_ctr_enc_128_avx_by8)
|
||||
*/
|
||||
SYM_FUNC_START(aes_ctr_enc_192_avx_by8)
|
||||
/* call the aes main loop */
|
||||
do_aes_ctrmain KEY_192
|
||||
do_aes_ctrmain KEY_192 0
|
||||
|
||||
SYM_FUNC_END(aes_ctr_enc_192_avx_by8)
|
||||
|
||||
@ -520,6 +553,45 @@ SYM_FUNC_END(aes_ctr_enc_192_avx_by8)
|
||||
*/
|
||||
SYM_FUNC_START(aes_ctr_enc_256_avx_by8)
|
||||
/* call the aes main loop */
|
||||
do_aes_ctrmain KEY_256
|
||||
do_aes_ctrmain KEY_256 0
|
||||
|
||||
SYM_FUNC_END(aes_ctr_enc_256_avx_by8)
|
||||
|
||||
/*
|
||||
* routine to do AES128 XCTR enc/decrypt "by8"
|
||||
* XMM registers are clobbered.
|
||||
* Saving/restoring must be done at a higher level
|
||||
* aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv, const void *keys,
|
||||
* u8* out, unsigned int num_bytes, unsigned int byte_ctr)
|
||||
*/
|
||||
SYM_FUNC_START(aes_xctr_enc_128_avx_by8)
|
||||
/* call the aes main loop */
|
||||
do_aes_ctrmain KEY_128 1
|
||||
|
||||
SYM_FUNC_END(aes_xctr_enc_128_avx_by8)
|
||||
|
||||
/*
|
||||
* routine to do AES192 XCTR enc/decrypt "by8"
|
||||
* XMM registers are clobbered.
|
||||
* Saving/restoring must be done at a higher level
|
||||
* aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv, const void *keys,
|
||||
* u8* out, unsigned int num_bytes, unsigned int byte_ctr)
|
||||
*/
|
||||
SYM_FUNC_START(aes_xctr_enc_192_avx_by8)
|
||||
/* call the aes main loop */
|
||||
do_aes_ctrmain KEY_192 1
|
||||
|
||||
SYM_FUNC_END(aes_xctr_enc_192_avx_by8)
|
||||
|
||||
/*
|
||||
* routine to do AES256 XCTR enc/decrypt "by8"
|
||||
* XMM registers are clobbered.
|
||||
* Saving/restoring must be done at a higher level
|
||||
* aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv, const void *keys,
|
||||
* u8* out, unsigned int num_bytes, unsigned int byte_ctr)
|
||||
*/
|
||||
SYM_FUNC_START(aes_xctr_enc_256_avx_by8)
|
||||
/* call the aes main loop */
|
||||
do_aes_ctrmain KEY_256 1
|
||||
|
||||
SYM_FUNC_END(aes_xctr_enc_256_avx_by8)
|
||||
|
@ -135,6 +135,20 @@ asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv,
|
||||
void *keys, u8 *out, unsigned int num_bytes);
|
||||
asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv,
|
||||
void *keys, u8 *out, unsigned int num_bytes);
|
||||
|
||||
|
||||
asmlinkage void aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv,
|
||||
const void *keys, u8 *out, unsigned int num_bytes,
|
||||
unsigned int byte_ctr);
|
||||
|
||||
asmlinkage void aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv,
|
||||
const void *keys, u8 *out, unsigned int num_bytes,
|
||||
unsigned int byte_ctr);
|
||||
|
||||
asmlinkage void aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv,
|
||||
const void *keys, u8 *out, unsigned int num_bytes,
|
||||
unsigned int byte_ctr);
|
||||
|
||||
/*
|
||||
* asmlinkage void aesni_gcm_init_avx_gen2()
|
||||
* gcm_data *my_ctx_data, context data
|
||||
@ -527,6 +541,59 @@ static int ctr_crypt(struct skcipher_request *req)
|
||||
return err;
|
||||
}
|
||||
|
||||
static void aesni_xctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out,
|
||||
const u8 *in, unsigned int len, u8 *iv,
|
||||
unsigned int byte_ctr)
|
||||
{
|
||||
if (ctx->key_length == AES_KEYSIZE_128)
|
||||
aes_xctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len,
|
||||
byte_ctr);
|
||||
else if (ctx->key_length == AES_KEYSIZE_192)
|
||||
aes_xctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len,
|
||||
byte_ctr);
|
||||
else
|
||||
aes_xctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len,
|
||||
byte_ctr);
|
||||
}
|
||||
|
||||
static int xctr_crypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
|
||||
u8 keystream[AES_BLOCK_SIZE];
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
unsigned int byte_ctr = 0;
|
||||
int err;
|
||||
__le32 block[AES_BLOCK_SIZE / sizeof(__le32)];
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((nbytes = walk.nbytes) > 0) {
|
||||
kernel_fpu_begin();
|
||||
if (nbytes & AES_BLOCK_MASK)
|
||||
aesni_xctr_enc_avx_tfm(ctx, walk.dst.virt.addr,
|
||||
walk.src.virt.addr, nbytes & AES_BLOCK_MASK,
|
||||
walk.iv, byte_ctr);
|
||||
nbytes &= ~AES_BLOCK_MASK;
|
||||
byte_ctr += walk.nbytes - nbytes;
|
||||
|
||||
if (walk.nbytes == walk.total && nbytes > 0) {
|
||||
memcpy(block, walk.iv, AES_BLOCK_SIZE);
|
||||
block[0] ^= cpu_to_le32(1 + byte_ctr / AES_BLOCK_SIZE);
|
||||
aesni_enc(ctx, keystream, (u8 *)block);
|
||||
crypto_xor_cpy(walk.dst.virt.addr + walk.nbytes -
|
||||
nbytes, walk.src.virt.addr + walk.nbytes
|
||||
- nbytes, keystream, nbytes);
|
||||
byte_ctr += nbytes;
|
||||
nbytes = 0;
|
||||
}
|
||||
kernel_fpu_end();
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int
|
||||
rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
|
||||
{
|
||||
@ -1050,6 +1117,33 @@ static struct skcipher_alg aesni_skciphers[] = {
|
||||
static
|
||||
struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* XCTR does not have a non-AVX implementation, so it must be enabled
|
||||
* conditionally.
|
||||
*/
|
||||
static struct skcipher_alg aesni_xctr = {
|
||||
.base = {
|
||||
.cra_name = "__xctr(aes)",
|
||||
.cra_driver_name = "__xctr-aes-aesni",
|
||||
.cra_priority = 400,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = CRYPTO_AES_CTX_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.setkey = aesni_skcipher_setkey,
|
||||
.encrypt = xctr_crypt,
|
||||
.decrypt = xctr_crypt,
|
||||
};
|
||||
|
||||
static struct simd_skcipher_alg *aesni_simd_xctr;
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int key_len)
|
||||
@ -1163,7 +1257,7 @@ static int __init aesni_init(void)
|
||||
static_call_update(aesni_ctr_enc_tfm, aesni_ctr_enc_avx_tfm);
|
||||
pr_info("AES CTR mode by8 optimization enabled\n");
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
err = crypto_register_alg(&aesni_cipher_alg);
|
||||
if (err)
|
||||
@ -1180,8 +1274,22 @@ static int __init aesni_init(void)
|
||||
if (err)
|
||||
goto unregister_skciphers;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (boot_cpu_has(X86_FEATURE_AVX))
|
||||
err = simd_register_skciphers_compat(&aesni_xctr, 1,
|
||||
&aesni_simd_xctr);
|
||||
if (err)
|
||||
goto unregister_aeads;
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
unregister_aeads:
|
||||
simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads),
|
||||
aesni_simd_aeads);
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
unregister_skciphers:
|
||||
simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
|
||||
aesni_simd_skciphers);
|
||||
@ -1197,6 +1305,10 @@ static void __exit aesni_exit(void)
|
||||
simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
|
||||
aesni_simd_skciphers);
|
||||
crypto_unregister_alg(&aesni_cipher_alg);
|
||||
#ifdef CONFIG_X86_64
|
||||
if (boot_cpu_has(X86_FEATURE_AVX))
|
||||
simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr);
|
||||
#endif /* CONFIG_X86_64 */
|
||||
}
|
||||
|
||||
late_initcall(aesni_init);
|
||||
|
@ -4,7 +4,6 @@
|
||||
*/
|
||||
|
||||
#include <crypto/internal/blake2s.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/jump_label.h>
|
||||
@ -33,7 +32,7 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block,
|
||||
/* SIMD disables preemption, so relax after processing each page. */
|
||||
BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
|
||||
|
||||
if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
|
||||
if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) {
|
||||
blake2s_compress_generic(state, block, nblocks, inc);
|
||||
return;
|
||||
}
|
||||
|
@ -1,77 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <crypto/internal/blake2s.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
static int crypto_blake2s_update_x86(struct shash_desc *desc,
|
||||
const u8 *in, unsigned int inlen)
|
||||
{
|
||||
return crypto_blake2s_update(desc, in, inlen, false);
|
||||
}
|
||||
|
||||
static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
return crypto_blake2s_final(desc, out, false);
|
||||
}
|
||||
|
||||
#define BLAKE2S_ALG(name, driver_name, digest_size) \
|
||||
{ \
|
||||
.base.cra_name = name, \
|
||||
.base.cra_driver_name = driver_name, \
|
||||
.base.cra_priority = 200, \
|
||||
.base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
|
||||
.base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \
|
||||
.base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \
|
||||
.base.cra_module = THIS_MODULE, \
|
||||
.digestsize = digest_size, \
|
||||
.setkey = crypto_blake2s_setkey, \
|
||||
.init = crypto_blake2s_init, \
|
||||
.update = crypto_blake2s_update_x86, \
|
||||
.final = crypto_blake2s_final_x86, \
|
||||
.descsize = sizeof(struct blake2s_state), \
|
||||
}
|
||||
|
||||
static struct shash_alg blake2s_algs[] = {
|
||||
BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE),
|
||||
};
|
||||
|
||||
static int __init blake2s_mod_init(void)
|
||||
{
|
||||
if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
|
||||
return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit blake2s_mod_exit(void)
|
||||
{
|
||||
if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
|
||||
crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
|
||||
}
|
||||
|
||||
module_init(blake2s_mod_init);
|
||||
module_exit(blake2s_mod_exit);
|
||||
|
||||
MODULE_ALIAS_CRYPTO("blake2s-128");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-128-x86");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-160");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-160-x86");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-224");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-224-x86");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-256");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-256-x86");
|
||||
MODULE_LICENSE("GPL v2");
|
321
arch/x86/crypto/polyval-clmulni_asm.S
Normal file
321
arch/x86/crypto/polyval-clmulni_asm.S
Normal file
@ -0,0 +1,321 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright 2021 Google LLC
|
||||
*/
|
||||
/*
|
||||
* This is an efficient implementation of POLYVAL using intel PCLMULQDQ-NI
|
||||
* instructions. It works on 8 blocks at a time, by precomputing the first 8
|
||||
* keys powers h^8, ..., h^1 in the POLYVAL finite field. This precomputation
|
||||
* allows us to split finite field multiplication into two steps.
|
||||
*
|
||||
* In the first step, we consider h^i, m_i as normal polynomials of degree less
|
||||
* than 128. We then compute p(x) = h^8m_0 + ... + h^1m_7 where multiplication
|
||||
* is simply polynomial multiplication.
|
||||
*
|
||||
* In the second step, we compute the reduction of p(x) modulo the finite field
|
||||
* modulus g(x) = x^128 + x^127 + x^126 + x^121 + 1.
|
||||
*
|
||||
* This two step process is equivalent to computing h^8m_0 + ... + h^1m_7 where
|
||||
* multiplication is finite field multiplication. The advantage is that the
|
||||
* two-step process only requires 1 finite field reduction for every 8
|
||||
* polynomial multiplications. Further parallelism is gained by interleaving the
|
||||
* multiplications and polynomial reductions.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
#define STRIDE_BLOCKS 8
|
||||
|
||||
#define GSTAR %xmm7
|
||||
#define PL %xmm8
|
||||
#define PH %xmm9
|
||||
#define TMP_XMM %xmm11
|
||||
#define LO %xmm12
|
||||
#define HI %xmm13
|
||||
#define MI %xmm14
|
||||
#define SUM %xmm15
|
||||
|
||||
#define KEY_POWERS %rdi
|
||||
#define MSG %rsi
|
||||
#define BLOCKS_LEFT %rdx
|
||||
#define ACCUMULATOR %rcx
|
||||
#define TMP %rax
|
||||
|
||||
.section .rodata.cst16.gstar, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
.Lgstar:
|
||||
.quad 0xc200000000000000, 0xc200000000000000
|
||||
|
||||
.text
|
||||
|
||||
/*
|
||||
* Performs schoolbook1_iteration on two lists of 128-bit polynomials of length
|
||||
* count pointed to by MSG and KEY_POWERS.
|
||||
*/
|
||||
.macro schoolbook1 count
|
||||
.set i, 0
|
||||
.rept (\count)
|
||||
schoolbook1_iteration i 0
|
||||
.set i, (i +1)
|
||||
.endr
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Computes the product of two 128-bit polynomials at the memory locations
|
||||
* specified by (MSG + 16*i) and (KEY_POWERS + 16*i) and XORs the components of
|
||||
* the 256-bit product into LO, MI, HI.
|
||||
*
|
||||
* Given:
|
||||
* X = [X_1 : X_0]
|
||||
* Y = [Y_1 : Y_0]
|
||||
*
|
||||
* We compute:
|
||||
* LO += X_0 * Y_0
|
||||
* MI += X_0 * Y_1 + X_1 * Y_0
|
||||
* HI += X_1 * Y_1
|
||||
*
|
||||
* Later, the 256-bit result can be extracted as:
|
||||
* [HI_1 : HI_0 + MI_1 : LO_1 + MI_0 : LO_0]
|
||||
* This step is done when computing the polynomial reduction for efficiency
|
||||
* reasons.
|
||||
*
|
||||
* If xor_sum == 1, then also XOR the value of SUM into m_0. This avoids an
|
||||
* extra multiplication of SUM and h^8.
|
||||
*/
|
||||
.macro schoolbook1_iteration i xor_sum
|
||||
movups (16*\i)(MSG), %xmm0
|
||||
.if (\i == 0 && \xor_sum == 1)
|
||||
pxor SUM, %xmm0
|
||||
.endif
|
||||
vpclmulqdq $0x01, (16*\i)(KEY_POWERS), %xmm0, %xmm2
|
||||
vpclmulqdq $0x00, (16*\i)(KEY_POWERS), %xmm0, %xmm1
|
||||
vpclmulqdq $0x10, (16*\i)(KEY_POWERS), %xmm0, %xmm3
|
||||
vpclmulqdq $0x11, (16*\i)(KEY_POWERS), %xmm0, %xmm4
|
||||
vpxor %xmm2, MI, MI
|
||||
vpxor %xmm1, LO, LO
|
||||
vpxor %xmm4, HI, HI
|
||||
vpxor %xmm3, MI, MI
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Performs the same computation as schoolbook1_iteration, except we expect the
|
||||
* arguments to already be loaded into xmm0 and xmm1 and we set the result
|
||||
* registers LO, MI, and HI directly rather than XOR'ing into them.
|
||||
*/
|
||||
.macro schoolbook1_noload
|
||||
vpclmulqdq $0x01, %xmm0, %xmm1, MI
|
||||
vpclmulqdq $0x10, %xmm0, %xmm1, %xmm2
|
||||
vpclmulqdq $0x00, %xmm0, %xmm1, LO
|
||||
vpclmulqdq $0x11, %xmm0, %xmm1, HI
|
||||
vpxor %xmm2, MI, MI
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Computes the 256-bit polynomial represented by LO, HI, MI. Stores
|
||||
* the result in PL, PH.
|
||||
* [PH : PL] = [HI_1 : HI_0 + MI_1 : LO_1 + MI_0 : LO_0]
|
||||
*/
|
||||
.macro schoolbook2
|
||||
vpslldq $8, MI, PL
|
||||
vpsrldq $8, MI, PH
|
||||
pxor LO, PL
|
||||
pxor HI, PH
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Computes the 128-bit reduction of PH : PL. Stores the result in dest.
|
||||
*
|
||||
* This macro computes p(x) mod g(x) where p(x) is in montgomery form and g(x) =
|
||||
* x^128 + x^127 + x^126 + x^121 + 1.
|
||||
*
|
||||
* We have a 256-bit polynomial PH : PL = P_3 : P_2 : P_1 : P_0 that is the
|
||||
* product of two 128-bit polynomials in Montgomery form. We need to reduce it
|
||||
* mod g(x). Also, since polynomials in Montgomery form have an "extra" factor
|
||||
* of x^128, this product has two extra factors of x^128. To get it back into
|
||||
* Montgomery form, we need to remove one of these factors by dividing by x^128.
|
||||
*
|
||||
* To accomplish both of these goals, we add multiples of g(x) that cancel out
|
||||
* the low 128 bits P_1 : P_0, leaving just the high 128 bits. Since the low
|
||||
* bits are zero, the polynomial division by x^128 can be done by right shifting.
|
||||
*
|
||||
* Since the only nonzero term in the low 64 bits of g(x) is the constant term,
|
||||
* the multiple of g(x) needed to cancel out P_0 is P_0 * g(x). The CPU can
|
||||
* only do 64x64 bit multiplications, so split P_0 * g(x) into x^128 * P_0 +
|
||||
* x^64 * g*(x) * P_0 + P_0, where g*(x) is bits 64-127 of g(x). Adding this to
|
||||
* the original polynomial gives P_3 : P_2 + P_0 + T_1 : P_1 + T_0 : 0, where T
|
||||
* = T_1 : T_0 = g*(x) * P_0. Thus, bits 0-63 got "folded" into bits 64-191.
|
||||
*
|
||||
* Repeating this same process on the next 64 bits "folds" bits 64-127 into bits
|
||||
* 128-255, giving the answer in bits 128-255. This time, we need to cancel P_1
|
||||
* + T_0 in bits 64-127. The multiple of g(x) required is (P_1 + T_0) * g(x) *
|
||||
* x^64. Adding this to our previous computation gives P_3 + P_1 + T_0 + V_1 :
|
||||
* P_2 + P_0 + T_1 + V_0 : 0 : 0, where V = V_1 : V_0 = g*(x) * (P_1 + T_0).
|
||||
*
|
||||
* So our final computation is:
|
||||
* T = T_1 : T_0 = g*(x) * P_0
|
||||
* V = V_1 : V_0 = g*(x) * (P_1 + T_0)
|
||||
* p(x) / x^{128} mod g(x) = P_3 + P_1 + T_0 + V_1 : P_2 + P_0 + T_1 + V_0
|
||||
*
|
||||
* The implementation below saves a XOR instruction by computing P_1 + T_0 : P_0
|
||||
* + T_1 and XORing into dest, rather than separately XORing P_1 : P_0 and T_0 :
|
||||
* T_1 into dest. This allows us to reuse P_1 + T_0 when computing V.
|
||||
*/
|
||||
.macro montgomery_reduction dest
|
||||
vpclmulqdq $0x00, PL, GSTAR, TMP_XMM # TMP_XMM = T_1 : T_0 = P_0 * g*(x)
|
||||
pshufd $0b01001110, TMP_XMM, TMP_XMM # TMP_XMM = T_0 : T_1
|
||||
pxor PL, TMP_XMM # TMP_XMM = P_1 + T_0 : P_0 + T_1
|
||||
pxor TMP_XMM, PH # PH = P_3 + P_1 + T_0 : P_2 + P_0 + T_1
|
||||
pclmulqdq $0x11, GSTAR, TMP_XMM # TMP_XMM = V_1 : V_0 = V = [(P_1 + T_0) * g*(x)]
|
||||
vpxor TMP_XMM, PH, \dest
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Compute schoolbook multiplication for 8 blocks
|
||||
* m_0h^8 + ... + m_7h^1
|
||||
*
|
||||
* If reduce is set, also computes the montgomery reduction of the
|
||||
* previous full_stride call and XORs with the first message block.
|
||||
* (m_0 + REDUCE(PL, PH))h^8 + ... + m_7h^1.
|
||||
* I.e., the first multiplication uses m_0 + REDUCE(PL, PH) instead of m_0.
|
||||
*/
|
||||
.macro full_stride reduce
|
||||
pxor LO, LO
|
||||
pxor HI, HI
|
||||
pxor MI, MI
|
||||
|
||||
schoolbook1_iteration 7 0
|
||||
.if \reduce
|
||||
vpclmulqdq $0x00, PL, GSTAR, TMP_XMM
|
||||
.endif
|
||||
|
||||
schoolbook1_iteration 6 0
|
||||
.if \reduce
|
||||
pshufd $0b01001110, TMP_XMM, TMP_XMM
|
||||
.endif
|
||||
|
||||
schoolbook1_iteration 5 0
|
||||
.if \reduce
|
||||
pxor PL, TMP_XMM
|
||||
.endif
|
||||
|
||||
schoolbook1_iteration 4 0
|
||||
.if \reduce
|
||||
pxor TMP_XMM, PH
|
||||
.endif
|
||||
|
||||
schoolbook1_iteration 3 0
|
||||
.if \reduce
|
||||
pclmulqdq $0x11, GSTAR, TMP_XMM
|
||||
.endif
|
||||
|
||||
schoolbook1_iteration 2 0
|
||||
.if \reduce
|
||||
vpxor TMP_XMM, PH, SUM
|
||||
.endif
|
||||
|
||||
schoolbook1_iteration 1 0
|
||||
|
||||
schoolbook1_iteration 0 1
|
||||
|
||||
addq $(8*16), MSG
|
||||
schoolbook2
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Process BLOCKS_LEFT blocks, where 0 < BLOCKS_LEFT < STRIDE_BLOCKS
|
||||
*/
|
||||
.macro partial_stride
|
||||
mov BLOCKS_LEFT, TMP
|
||||
shlq $4, TMP
|
||||
addq $(16*STRIDE_BLOCKS), KEY_POWERS
|
||||
subq TMP, KEY_POWERS
|
||||
|
||||
movups (MSG), %xmm0
|
||||
pxor SUM, %xmm0
|
||||
movaps (KEY_POWERS), %xmm1
|
||||
schoolbook1_noload
|
||||
dec BLOCKS_LEFT
|
||||
addq $16, MSG
|
||||
addq $16, KEY_POWERS
|
||||
|
||||
test $4, BLOCKS_LEFT
|
||||
jz .Lpartial4BlocksDone
|
||||
schoolbook1 4
|
||||
addq $(4*16), MSG
|
||||
addq $(4*16), KEY_POWERS
|
||||
.Lpartial4BlocksDone:
|
||||
test $2, BLOCKS_LEFT
|
||||
jz .Lpartial2BlocksDone
|
||||
schoolbook1 2
|
||||
addq $(2*16), MSG
|
||||
addq $(2*16), KEY_POWERS
|
||||
.Lpartial2BlocksDone:
|
||||
test $1, BLOCKS_LEFT
|
||||
jz .LpartialDone
|
||||
schoolbook1 1
|
||||
.LpartialDone:
|
||||
schoolbook2
|
||||
montgomery_reduction SUM
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Perform montgomery multiplication in GF(2^128) and store result in op1.
|
||||
*
|
||||
* Computes op1*op2*x^{-128} mod x^128 + x^127 + x^126 + x^121 + 1
|
||||
* If op1, op2 are in montgomery form, this computes the montgomery
|
||||
* form of op1*op2.
|
||||
*
|
||||
* void clmul_polyval_mul(u8 *op1, const u8 *op2);
|
||||
*/
|
||||
SYM_FUNC_START(clmul_polyval_mul)
|
||||
FRAME_BEGIN
|
||||
vmovdqa .Lgstar(%rip), GSTAR
|
||||
movups (%rdi), %xmm0
|
||||
movups (%rsi), %xmm1
|
||||
schoolbook1_noload
|
||||
schoolbook2
|
||||
montgomery_reduction SUM
|
||||
movups SUM, (%rdi)
|
||||
FRAME_END
|
||||
RET
|
||||
SYM_FUNC_END(clmul_polyval_mul)
|
||||
|
||||
/*
|
||||
* Perform polynomial evaluation as specified by POLYVAL. This computes:
|
||||
* h^n * accumulator + h^n * m_0 + ... + h^1 * m_{n-1}
|
||||
* where n=nblocks, h is the hash key, and m_i are the message blocks.
|
||||
*
|
||||
* rdi - pointer to precomputed key powers h^8 ... h^1
|
||||
* rsi - pointer to message blocks
|
||||
* rdx - number of blocks to hash
|
||||
* rcx - pointer to the accumulator
|
||||
*
|
||||
* void clmul_polyval_update(const struct polyval_tfm_ctx *keys,
|
||||
* const u8 *in, size_t nblocks, u8 *accumulator);
|
||||
*/
|
||||
SYM_FUNC_START(clmul_polyval_update)
|
||||
FRAME_BEGIN
|
||||
vmovdqa .Lgstar(%rip), GSTAR
|
||||
movups (ACCUMULATOR), SUM
|
||||
subq $STRIDE_BLOCKS, BLOCKS_LEFT
|
||||
js .LstrideLoopExit
|
||||
full_stride 0
|
||||
subq $STRIDE_BLOCKS, BLOCKS_LEFT
|
||||
js .LstrideLoopExitReduce
|
||||
.LstrideLoop:
|
||||
full_stride 1
|
||||
subq $STRIDE_BLOCKS, BLOCKS_LEFT
|
||||
jns .LstrideLoop
|
||||
.LstrideLoopExitReduce:
|
||||
montgomery_reduction SUM
|
||||
.LstrideLoopExit:
|
||||
add $STRIDE_BLOCKS, BLOCKS_LEFT
|
||||
jz .LskipPartial
|
||||
partial_stride
|
||||
.LskipPartial:
|
||||
movups SUM, (ACCUMULATOR)
|
||||
FRAME_END
|
||||
RET
|
||||
SYM_FUNC_END(clmul_polyval_update)
|
203
arch/x86/crypto/polyval-clmulni_glue.c
Normal file
203
arch/x86/crypto/polyval-clmulni_glue.c
Normal file
@ -0,0 +1,203 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Glue code for POLYVAL using PCMULQDQ-NI
|
||||
*
|
||||
* Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
|
||||
* Copyright (c) 2009 Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
* Copyright 2021 Google LLC
|
||||
*/
|
||||
|
||||
/*
|
||||
* Glue code based on ghash-clmulni-intel_glue.c.
|
||||
*
|
||||
* This implementation of POLYVAL uses montgomery multiplication
|
||||
* accelerated by PCLMULQDQ-NI to implement the finite field
|
||||
* operations.
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/polyval.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
#define NUM_KEY_POWERS 8
|
||||
|
||||
struct polyval_tfm_ctx {
|
||||
/*
|
||||
* These powers must be in the order h^8, ..., h^1.
|
||||
*/
|
||||
u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
struct polyval_desc_ctx {
|
||||
u8 buffer[POLYVAL_BLOCK_SIZE];
|
||||
u32 bytes;
|
||||
};
|
||||
|
||||
asmlinkage void clmul_polyval_update(const struct polyval_tfm_ctx *keys,
|
||||
const u8 *in, size_t nblocks, u8 *accumulator);
|
||||
asmlinkage void clmul_polyval_mul(u8 *op1, const u8 *op2);
|
||||
|
||||
static void internal_polyval_update(const struct polyval_tfm_ctx *keys,
|
||||
const u8 *in, size_t nblocks, u8 *accumulator)
|
||||
{
|
||||
if (likely(crypto_simd_usable())) {
|
||||
kernel_fpu_begin();
|
||||
clmul_polyval_update(keys, in, nblocks, accumulator);
|
||||
kernel_fpu_end();
|
||||
} else {
|
||||
polyval_update_non4k(keys->key_powers[NUM_KEY_POWERS-1], in,
|
||||
nblocks, accumulator);
|
||||
}
|
||||
}
|
||||
|
||||
static void internal_polyval_mul(u8 *op1, const u8 *op2)
|
||||
{
|
||||
if (likely(crypto_simd_usable())) {
|
||||
kernel_fpu_begin();
|
||||
clmul_polyval_mul(op1, op2);
|
||||
kernel_fpu_end();
|
||||
} else {
|
||||
polyval_mul_non4k(op1, op2);
|
||||
}
|
||||
}
|
||||
|
||||
static int polyval_x86_setkey(struct crypto_shash *tfm,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct polyval_tfm_ctx *tctx = crypto_shash_ctx(tfm);
|
||||
int i;
|
||||
|
||||
if (keylen != POLYVAL_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
memcpy(tctx->key_powers[NUM_KEY_POWERS-1], key, POLYVAL_BLOCK_SIZE);
|
||||
|
||||
for (i = NUM_KEY_POWERS-2; i >= 0; i--) {
|
||||
memcpy(tctx->key_powers[i], key, POLYVAL_BLOCK_SIZE);
|
||||
internal_polyval_mul(tctx->key_powers[i],
|
||||
tctx->key_powers[i+1]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_x86_init(struct shash_desc *desc)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
|
||||
memset(dctx, 0, sizeof(*dctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_x86_update(struct shash_desc *desc,
|
||||
const u8 *src, unsigned int srclen)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
u8 *pos;
|
||||
unsigned int nblocks;
|
||||
unsigned int n;
|
||||
|
||||
if (dctx->bytes) {
|
||||
n = min(srclen, dctx->bytes);
|
||||
pos = dctx->buffer + POLYVAL_BLOCK_SIZE - dctx->bytes;
|
||||
|
||||
dctx->bytes -= n;
|
||||
srclen -= n;
|
||||
|
||||
while (n--)
|
||||
*pos++ ^= *src++;
|
||||
|
||||
if (!dctx->bytes)
|
||||
internal_polyval_mul(dctx->buffer,
|
||||
tctx->key_powers[NUM_KEY_POWERS-1]);
|
||||
}
|
||||
|
||||
while (srclen >= POLYVAL_BLOCK_SIZE) {
|
||||
/* Allow rescheduling every 4K bytes. */
|
||||
nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE;
|
||||
internal_polyval_update(tctx, src, nblocks, dctx->buffer);
|
||||
srclen -= nblocks * POLYVAL_BLOCK_SIZE;
|
||||
src += nblocks * POLYVAL_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
if (srclen) {
|
||||
dctx->bytes = POLYVAL_BLOCK_SIZE - srclen;
|
||||
pos = dctx->buffer;
|
||||
while (srclen--)
|
||||
*pos++ ^= *src++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_x86_final(struct shash_desc *desc, u8 *dst)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
|
||||
if (dctx->bytes) {
|
||||
internal_polyval_mul(dctx->buffer,
|
||||
tctx->key_powers[NUM_KEY_POWERS-1]);
|
||||
}
|
||||
|
||||
memcpy(dst, dctx->buffer, POLYVAL_BLOCK_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg polyval_alg = {
|
||||
.digestsize = POLYVAL_DIGEST_SIZE,
|
||||
.init = polyval_x86_init,
|
||||
.update = polyval_x86_update,
|
||||
.final = polyval_x86_final,
|
||||
.setkey = polyval_x86_setkey,
|
||||
.descsize = sizeof(struct polyval_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "polyval",
|
||||
.cra_driver_name = "polyval-clmulni",
|
||||
.cra_priority = 200,
|
||||
.cra_blocksize = POLYVAL_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct polyval_tfm_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
};
|
||||
|
||||
__maybe_unused static const struct x86_cpu_id pcmul_cpu_id[] = {
|
||||
X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL),
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
|
||||
|
||||
static int __init polyval_clmulni_mod_init(void)
|
||||
{
|
||||
if (!x86_match_cpu(pcmul_cpu_id))
|
||||
return -ENODEV;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_AVX))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_shash(&polyval_alg);
|
||||
}
|
||||
|
||||
static void __exit polyval_clmulni_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_shash(&polyval_alg);
|
||||
}
|
||||
|
||||
module_init(polyval_clmulni_mod_init);
|
||||
module_exit(polyval_clmulni_mod_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("POLYVAL hash function accelerated by PCLMULQDQ-NI");
|
||||
MODULE_ALIAS_CRYPTO("polyval");
|
||||
MODULE_ALIAS_CRYPTO("polyval-clmulni");
|
@ -461,6 +461,15 @@ config CRYPTO_PCBC
|
||||
PCBC: Propagating Cipher Block Chaining mode
|
||||
This block cipher algorithm is required for RxRPC.
|
||||
|
||||
config CRYPTO_XCTR
|
||||
tristate
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_MANAGER
|
||||
help
|
||||
XCTR: XOR Counter mode. This blockcipher mode is a variant of CTR mode
|
||||
using XORs and little-endian addition rather than big-endian arithmetic.
|
||||
XCTR mode is used to implement HCTR2.
|
||||
|
||||
config CRYPTO_XTS
|
||||
tristate "XTS support"
|
||||
select CRYPTO_SKCIPHER
|
||||
@ -524,6 +533,17 @@ config CRYPTO_ADIANTUM
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CRYPTO_HCTR2
|
||||
tristate "HCTR2 support"
|
||||
select CRYPTO_XCTR
|
||||
select CRYPTO_POLYVAL
|
||||
select CRYPTO_MANAGER
|
||||
help
|
||||
HCTR2 is a length-preserving encryption mode for storage encryption that
|
||||
is efficient on processors with instructions to accelerate AES and
|
||||
carryless multiplication, e.g. x86 processors with AES-NI and CLMUL, and
|
||||
ARM processors with the ARMv8 crypto extensions.
|
||||
|
||||
config CRYPTO_ESSIV
|
||||
tristate "ESSIV support for block encryption"
|
||||
select CRYPTO_AUTHENC
|
||||
@ -692,26 +712,8 @@ config CRYPTO_BLAKE2B
|
||||
|
||||
See https://blake2.net for further information.
|
||||
|
||||
config CRYPTO_BLAKE2S
|
||||
tristate "BLAKE2s digest algorithm"
|
||||
select CRYPTO_LIB_BLAKE2S_GENERIC
|
||||
select CRYPTO_HASH
|
||||
help
|
||||
Implementation of cryptographic hash function BLAKE2s
|
||||
optimized for 8-32bit platforms and can produce digests of any size
|
||||
between 1 to 32. The keyed hash is also implemented.
|
||||
|
||||
This module provides the following algorithms:
|
||||
|
||||
- blake2s-128
|
||||
- blake2s-160
|
||||
- blake2s-224
|
||||
- blake2s-256
|
||||
|
||||
See https://blake2.net for further information.
|
||||
|
||||
config CRYPTO_BLAKE2S_X86
|
||||
tristate "BLAKE2s digest algorithm (x86 accelerated version)"
|
||||
bool "BLAKE2s digest algorithm (x86 accelerated version)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_LIB_BLAKE2S_GENERIC
|
||||
select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
|
||||
@ -765,6 +767,23 @@ config CRYPTO_GHASH
|
||||
GHASH is the hash function used in GCM (Galois/Counter Mode).
|
||||
It is not a general-purpose cryptographic hash function.
|
||||
|
||||
config CRYPTO_POLYVAL
|
||||
tristate
|
||||
select CRYPTO_GF128MUL
|
||||
select CRYPTO_HASH
|
||||
help
|
||||
POLYVAL is the hash function used in HCTR2. It is not a general-purpose
|
||||
cryptographic hash function.
|
||||
|
||||
config CRYPTO_POLYVAL_CLMUL_NI
|
||||
tristate "POLYVAL hash function (CLMUL-NI accelerated)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_POLYVAL
|
||||
help
|
||||
This is the x86_64 CLMUL-NI accelerated implementation of POLYVAL. It is
|
||||
used to efficiently implement HCTR2 on x86-64 processors that support
|
||||
carry-less multiplication instructions.
|
||||
|
||||
config CRYPTO_POLY1305
|
||||
tristate "Poly1305 authenticator algorithm"
|
||||
select CRYPTO_HASH
|
||||
@ -1142,7 +1161,7 @@ config CRYPTO_AES_NI_INTEL
|
||||
In addition to AES cipher algorithm support, the acceleration
|
||||
for some popular block cipher mode is supported too, including
|
||||
ECB, CBC, LRW, XTS. The 64 bit version has additional
|
||||
acceleration for CTR.
|
||||
acceleration for CTR and XCTR.
|
||||
|
||||
config CRYPTO_AES_SPARC64
|
||||
tristate "AES cipher algorithms (SPARC64)"
|
||||
|
@ -84,7 +84,6 @@ obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o
|
||||
obj-$(CONFIG_CRYPTO_WP512) += wp512.o
|
||||
CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
|
||||
obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b_generic.o
|
||||
obj-$(CONFIG_CRYPTO_BLAKE2S) += blake2s_generic.o
|
||||
obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
|
||||
obj-$(CONFIG_CRYPTO_ECB) += ecb.o
|
||||
obj-$(CONFIG_CRYPTO_CBC) += cbc.o
|
||||
@ -94,6 +93,8 @@ obj-$(CONFIG_CRYPTO_CTS) += cts.o
|
||||
obj-$(CONFIG_CRYPTO_LRW) += lrw.o
|
||||
obj-$(CONFIG_CRYPTO_XTS) += xts.o
|
||||
obj-$(CONFIG_CRYPTO_CTR) += ctr.o
|
||||
obj-$(CONFIG_CRYPTO_XCTR) += xctr.o
|
||||
obj-$(CONFIG_CRYPTO_HCTR2) += hctr2.o
|
||||
obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o
|
||||
obj-$(CONFIG_CRYPTO_ADIANTUM) += adiantum.o
|
||||
obj-$(CONFIG_CRYPTO_NHPOLY1305) += nhpoly1305.o
|
||||
@ -171,6 +172,7 @@ UBSAN_SANITIZE_jitterentropy.o = n
|
||||
jitterentropy_rng-y := jitterentropy.o jitterentropy-kcapi.o
|
||||
obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
|
||||
obj-$(CONFIG_CRYPTO_GHASH) += ghash-generic.o
|
||||
obj-$(CONFIG_CRYPTO_POLYVAL) += polyval-generic.o
|
||||
obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o
|
||||
obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o
|
||||
obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
|
||||
|
@ -1,75 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* shash interface to the generic implementation of BLAKE2s
|
||||
*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <crypto/internal/blake2s.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
static int crypto_blake2s_update_generic(struct shash_desc *desc,
|
||||
const u8 *in, unsigned int inlen)
|
||||
{
|
||||
return crypto_blake2s_update(desc, in, inlen, true);
|
||||
}
|
||||
|
||||
static int crypto_blake2s_final_generic(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
return crypto_blake2s_final(desc, out, true);
|
||||
}
|
||||
|
||||
#define BLAKE2S_ALG(name, driver_name, digest_size) \
|
||||
{ \
|
||||
.base.cra_name = name, \
|
||||
.base.cra_driver_name = driver_name, \
|
||||
.base.cra_priority = 100, \
|
||||
.base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
|
||||
.base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \
|
||||
.base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \
|
||||
.base.cra_module = THIS_MODULE, \
|
||||
.digestsize = digest_size, \
|
||||
.setkey = crypto_blake2s_setkey, \
|
||||
.init = crypto_blake2s_init, \
|
||||
.update = crypto_blake2s_update_generic, \
|
||||
.final = crypto_blake2s_final_generic, \
|
||||
.descsize = sizeof(struct blake2s_state), \
|
||||
}
|
||||
|
||||
static struct shash_alg blake2s_algs[] = {
|
||||
BLAKE2S_ALG("blake2s-128", "blake2s-128-generic",
|
||||
BLAKE2S_128_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-160", "blake2s-160-generic",
|
||||
BLAKE2S_160_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-224", "blake2s-224-generic",
|
||||
BLAKE2S_224_HASH_SIZE),
|
||||
BLAKE2S_ALG("blake2s-256", "blake2s-256-generic",
|
||||
BLAKE2S_256_HASH_SIZE),
|
||||
};
|
||||
|
||||
static int __init blake2s_mod_init(void)
|
||||
{
|
||||
return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
|
||||
}
|
||||
|
||||
static void __exit blake2s_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
|
||||
}
|
||||
|
||||
subsys_initcall(blake2s_mod_init);
|
||||
module_exit(blake2s_mod_exit);
|
||||
|
||||
MODULE_ALIAS_CRYPTO("blake2s-128");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-128-generic");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-160");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-160-generic");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-224");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-224-generic");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-256");
|
||||
MODULE_ALIAS_CRYPTO("blake2s-256-generic");
|
||||
MODULE_LICENSE("GPL v2");
|
581
crypto/hctr2.c
Normal file
581
crypto/hctr2.c
Normal file
@ -0,0 +1,581 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* HCTR2 length-preserving encryption mode
|
||||
*
|
||||
* Copyright 2021 Google LLC
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* HCTR2 is a length-preserving encryption mode that is efficient on
|
||||
* processors with instructions to accelerate AES and carryless
|
||||
* multiplication, e.g. x86 processors with AES-NI and CLMUL, and ARM
|
||||
* processors with the ARMv8 crypto extensions.
|
||||
*
|
||||
* For more details, see the paper: "Length-preserving encryption with HCTR2"
|
||||
* (https://eprint.iacr.org/2021/1441.pdf)
|
||||
*/
|
||||
|
||||
#include <crypto/internal/cipher.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/polyval.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#define BLOCKCIPHER_BLOCK_SIZE 16
|
||||
|
||||
/*
|
||||
* The specification allows variable-length tweaks, but Linux's crypto API
|
||||
* currently only allows algorithms to support a single length. The "natural"
|
||||
* tweak length for HCTR2 is 16, since that fits into one POLYVAL block for
|
||||
* the best performance. But longer tweaks are useful for fscrypt, to avoid
|
||||
* needing to derive per-file keys. So instead we use two blocks, or 32 bytes.
|
||||
*/
|
||||
#define TWEAK_SIZE 32
|
||||
|
||||
struct hctr2_instance_ctx {
|
||||
struct crypto_cipher_spawn blockcipher_spawn;
|
||||
struct crypto_skcipher_spawn xctr_spawn;
|
||||
struct crypto_shash_spawn polyval_spawn;
|
||||
};
|
||||
|
||||
struct hctr2_tfm_ctx {
|
||||
struct crypto_cipher *blockcipher;
|
||||
struct crypto_skcipher *xctr;
|
||||
struct crypto_shash *polyval;
|
||||
u8 L[BLOCKCIPHER_BLOCK_SIZE];
|
||||
int hashed_tweak_offset;
|
||||
/*
|
||||
* This struct is allocated with extra space for two exported hash
|
||||
* states. Since the hash state size is not known at compile-time, we
|
||||
* can't add these to the struct directly.
|
||||
*
|
||||
* hashed_tweaklen_divisible;
|
||||
* hashed_tweaklen_remainder;
|
||||
*/
|
||||
};
|
||||
|
||||
struct hctr2_request_ctx {
|
||||
u8 first_block[BLOCKCIPHER_BLOCK_SIZE];
|
||||
u8 xctr_iv[BLOCKCIPHER_BLOCK_SIZE];
|
||||
struct scatterlist *bulk_part_dst;
|
||||
struct scatterlist *bulk_part_src;
|
||||
struct scatterlist sg_src[2];
|
||||
struct scatterlist sg_dst[2];
|
||||
/*
|
||||
* Sub-request sizes are unknown at compile-time, so they need to go
|
||||
* after the members with known sizes.
|
||||
*/
|
||||
union {
|
||||
struct shash_desc hash_desc;
|
||||
struct skcipher_request xctr_req;
|
||||
} u;
|
||||
/*
|
||||
* This struct is allocated with extra space for one exported hash
|
||||
* state. Since the hash state size is not known at compile-time, we
|
||||
* can't add it to the struct directly.
|
||||
*
|
||||
* hashed_tweak;
|
||||
*/
|
||||
};
|
||||
|
||||
static inline u8 *hctr2_hashed_tweaklen(const struct hctr2_tfm_ctx *tctx,
|
||||
bool has_remainder)
|
||||
{
|
||||
u8 *p = (u8 *)tctx + sizeof(*tctx);
|
||||
|
||||
if (has_remainder) /* For messages not a multiple of block length */
|
||||
p += crypto_shash_statesize(tctx->polyval);
|
||||
return p;
|
||||
}
|
||||
|
||||
static inline u8 *hctr2_hashed_tweak(const struct hctr2_tfm_ctx *tctx,
|
||||
struct hctr2_request_ctx *rctx)
|
||||
{
|
||||
return (u8 *)rctx + tctx->hashed_tweak_offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* The input data for each HCTR2 hash step begins with a 16-byte block that
|
||||
* contains the tweak length and a flag that indicates whether the input is evenly
|
||||
* divisible into blocks. Since this implementation only supports one tweak
|
||||
* length, we precompute the two hash states resulting from hashing the two
|
||||
* possible values of this initial block. This reduces by one block the amount of
|
||||
* data that needs to be hashed for each encryption/decryption
|
||||
*
|
||||
* These precomputed hashes are stored in hctr2_tfm_ctx.
|
||||
*/
|
||||
static int hctr2_hash_tweaklen(struct hctr2_tfm_ctx *tctx, bool has_remainder)
|
||||
{
|
||||
SHASH_DESC_ON_STACK(shash, tfm->polyval);
|
||||
__le64 tweak_length_block[2];
|
||||
int err;
|
||||
|
||||
shash->tfm = tctx->polyval;
|
||||
memset(tweak_length_block, 0, sizeof(tweak_length_block));
|
||||
|
||||
tweak_length_block[0] = cpu_to_le64(TWEAK_SIZE * 8 * 2 + 2 + has_remainder);
|
||||
err = crypto_shash_init(shash);
|
||||
if (err)
|
||||
return err;
|
||||
err = crypto_shash_update(shash, (u8 *)tweak_length_block,
|
||||
POLYVAL_BLOCK_SIZE);
|
||||
if (err)
|
||||
return err;
|
||||
return crypto_shash_export(shash, hctr2_hashed_tweaklen(tctx, has_remainder));
|
||||
}
|
||||
|
||||
static int hctr2_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
|
||||
u8 hbar[BLOCKCIPHER_BLOCK_SIZE];
|
||||
int err;
|
||||
|
||||
crypto_cipher_clear_flags(tctx->blockcipher, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_cipher_set_flags(tctx->blockcipher,
|
||||
crypto_skcipher_get_flags(tfm) &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_cipher_setkey(tctx->blockcipher, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
crypto_skcipher_clear_flags(tctx->xctr, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_skcipher_set_flags(tctx->xctr,
|
||||
crypto_skcipher_get_flags(tfm) &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_skcipher_setkey(tctx->xctr, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
memset(hbar, 0, sizeof(hbar));
|
||||
crypto_cipher_encrypt_one(tctx->blockcipher, hbar, hbar);
|
||||
|
||||
memset(tctx->L, 0, sizeof(tctx->L));
|
||||
tctx->L[0] = 0x01;
|
||||
crypto_cipher_encrypt_one(tctx->blockcipher, tctx->L, tctx->L);
|
||||
|
||||
crypto_shash_clear_flags(tctx->polyval, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_shash_set_flags(tctx->polyval, crypto_skcipher_get_flags(tfm) &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_shash_setkey(tctx->polyval, hbar, BLOCKCIPHER_BLOCK_SIZE);
|
||||
if (err)
|
||||
return err;
|
||||
memzero_explicit(hbar, sizeof(hbar));
|
||||
|
||||
return hctr2_hash_tweaklen(tctx, true) ?: hctr2_hash_tweaklen(tctx, false);
|
||||
}
|
||||
|
||||
static int hctr2_hash_tweak(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
const struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
|
||||
struct hctr2_request_ctx *rctx = skcipher_request_ctx(req);
|
||||
struct shash_desc *hash_desc = &rctx->u.hash_desc;
|
||||
int err;
|
||||
bool has_remainder = req->cryptlen % POLYVAL_BLOCK_SIZE;
|
||||
|
||||
hash_desc->tfm = tctx->polyval;
|
||||
err = crypto_shash_import(hash_desc, hctr2_hashed_tweaklen(tctx, has_remainder));
|
||||
if (err)
|
||||
return err;
|
||||
err = crypto_shash_update(hash_desc, req->iv, TWEAK_SIZE);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
// Store the hashed tweak, since we need it when computing both
|
||||
// H(T || N) and H(T || V).
|
||||
return crypto_shash_export(hash_desc, hctr2_hashed_tweak(tctx, rctx));
|
||||
}
|
||||
|
||||
static int hctr2_hash_message(struct skcipher_request *req,
|
||||
struct scatterlist *sgl,
|
||||
u8 digest[POLYVAL_DIGEST_SIZE])
|
||||
{
|
||||
static const u8 padding[BLOCKCIPHER_BLOCK_SIZE] = { 0x1 };
|
||||
struct hctr2_request_ctx *rctx = skcipher_request_ctx(req);
|
||||
struct shash_desc *hash_desc = &rctx->u.hash_desc;
|
||||
const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
|
||||
struct sg_mapping_iter miter;
|
||||
unsigned int remainder = bulk_len % BLOCKCIPHER_BLOCK_SIZE;
|
||||
int i;
|
||||
int err = 0;
|
||||
int n = 0;
|
||||
|
||||
sg_miter_start(&miter, sgl, sg_nents(sgl),
|
||||
SG_MITER_FROM_SG | SG_MITER_ATOMIC);
|
||||
for (i = 0; i < bulk_len; i += n) {
|
||||
sg_miter_next(&miter);
|
||||
n = min_t(unsigned int, miter.length, bulk_len - i);
|
||||
err = crypto_shash_update(hash_desc, miter.addr, n);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
sg_miter_stop(&miter);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (remainder) {
|
||||
err = crypto_shash_update(hash_desc, padding,
|
||||
BLOCKCIPHER_BLOCK_SIZE - remainder);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
return crypto_shash_final(hash_desc, digest);
|
||||
}
|
||||
|
||||
static int hctr2_finish(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
const struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
|
||||
struct hctr2_request_ctx *rctx = skcipher_request_ctx(req);
|
||||
u8 digest[POLYVAL_DIGEST_SIZE];
|
||||
struct shash_desc *hash_desc = &rctx->u.hash_desc;
|
||||
int err;
|
||||
|
||||
// U = UU ^ H(T || V)
|
||||
// or M = MM ^ H(T || N)
|
||||
hash_desc->tfm = tctx->polyval;
|
||||
err = crypto_shash_import(hash_desc, hctr2_hashed_tweak(tctx, rctx));
|
||||
if (err)
|
||||
return err;
|
||||
err = hctr2_hash_message(req, rctx->bulk_part_dst, digest);
|
||||
if (err)
|
||||
return err;
|
||||
crypto_xor(rctx->first_block, digest, BLOCKCIPHER_BLOCK_SIZE);
|
||||
|
||||
// Copy U (or M) into dst scatterlist
|
||||
scatterwalk_map_and_copy(rctx->first_block, req->dst,
|
||||
0, BLOCKCIPHER_BLOCK_SIZE, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hctr2_xctr_done(struct crypto_async_request *areq,
|
||||
int err)
|
||||
{
|
||||
struct skcipher_request *req = areq->data;
|
||||
|
||||
if (!err)
|
||||
err = hctr2_finish(req);
|
||||
|
||||
skcipher_request_complete(req, err);
|
||||
}
|
||||
|
||||
static int hctr2_crypt(struct skcipher_request *req, bool enc)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
const struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
|
||||
struct hctr2_request_ctx *rctx = skcipher_request_ctx(req);
|
||||
u8 digest[POLYVAL_DIGEST_SIZE];
|
||||
int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
|
||||
int err;
|
||||
|
||||
// Requests must be at least one block
|
||||
if (req->cryptlen < BLOCKCIPHER_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
// Copy M (or U) into a temporary buffer
|
||||
scatterwalk_map_and_copy(rctx->first_block, req->src,
|
||||
0, BLOCKCIPHER_BLOCK_SIZE, 0);
|
||||
|
||||
// Create scatterlists for N and V
|
||||
rctx->bulk_part_src = scatterwalk_ffwd(rctx->sg_src, req->src,
|
||||
BLOCKCIPHER_BLOCK_SIZE);
|
||||
rctx->bulk_part_dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
|
||||
BLOCKCIPHER_BLOCK_SIZE);
|
||||
|
||||
// MM = M ^ H(T || N)
|
||||
// or UU = U ^ H(T || V)
|
||||
err = hctr2_hash_tweak(req);
|
||||
if (err)
|
||||
return err;
|
||||
err = hctr2_hash_message(req, rctx->bulk_part_src, digest);
|
||||
if (err)
|
||||
return err;
|
||||
crypto_xor(digest, rctx->first_block, BLOCKCIPHER_BLOCK_SIZE);
|
||||
|
||||
// UU = E(MM)
|
||||
// or MM = D(UU)
|
||||
if (enc)
|
||||
crypto_cipher_encrypt_one(tctx->blockcipher, rctx->first_block,
|
||||
digest);
|
||||
else
|
||||
crypto_cipher_decrypt_one(tctx->blockcipher, rctx->first_block,
|
||||
digest);
|
||||
|
||||
// S = MM ^ UU ^ L
|
||||
crypto_xor(digest, rctx->first_block, BLOCKCIPHER_BLOCK_SIZE);
|
||||
crypto_xor_cpy(rctx->xctr_iv, digest, tctx->L, BLOCKCIPHER_BLOCK_SIZE);
|
||||
|
||||
// V = XCTR(S, N)
|
||||
// or N = XCTR(S, V)
|
||||
skcipher_request_set_tfm(&rctx->u.xctr_req, tctx->xctr);
|
||||
skcipher_request_set_crypt(&rctx->u.xctr_req, rctx->bulk_part_src,
|
||||
rctx->bulk_part_dst, bulk_len,
|
||||
rctx->xctr_iv);
|
||||
skcipher_request_set_callback(&rctx->u.xctr_req,
|
||||
req->base.flags,
|
||||
hctr2_xctr_done, req);
|
||||
return crypto_skcipher_encrypt(&rctx->u.xctr_req) ?:
|
||||
hctr2_finish(req);
|
||||
}
|
||||
|
||||
static int hctr2_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return hctr2_crypt(req, true);
|
||||
}
|
||||
|
||||
static int hctr2_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return hctr2_crypt(req, false);
|
||||
}
|
||||
|
||||
static int hctr2_init_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct skcipher_instance *inst = skcipher_alg_instance(tfm);
|
||||
struct hctr2_instance_ctx *ictx = skcipher_instance_ctx(inst);
|
||||
struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_skcipher *xctr;
|
||||
struct crypto_cipher *blockcipher;
|
||||
struct crypto_shash *polyval;
|
||||
unsigned int subreq_size;
|
||||
int err;
|
||||
|
||||
xctr = crypto_spawn_skcipher(&ictx->xctr_spawn);
|
||||
if (IS_ERR(xctr))
|
||||
return PTR_ERR(xctr);
|
||||
|
||||
blockcipher = crypto_spawn_cipher(&ictx->blockcipher_spawn);
|
||||
if (IS_ERR(blockcipher)) {
|
||||
err = PTR_ERR(blockcipher);
|
||||
goto err_free_xctr;
|
||||
}
|
||||
|
||||
polyval = crypto_spawn_shash(&ictx->polyval_spawn);
|
||||
if (IS_ERR(polyval)) {
|
||||
err = PTR_ERR(polyval);
|
||||
goto err_free_blockcipher;
|
||||
}
|
||||
|
||||
tctx->xctr = xctr;
|
||||
tctx->blockcipher = blockcipher;
|
||||
tctx->polyval = polyval;
|
||||
|
||||
BUILD_BUG_ON(offsetofend(struct hctr2_request_ctx, u) !=
|
||||
sizeof(struct hctr2_request_ctx));
|
||||
subreq_size = max(sizeof_field(struct hctr2_request_ctx, u.hash_desc) +
|
||||
crypto_shash_descsize(polyval),
|
||||
sizeof_field(struct hctr2_request_ctx, u.xctr_req) +
|
||||
crypto_skcipher_reqsize(xctr));
|
||||
|
||||
tctx->hashed_tweak_offset = offsetof(struct hctr2_request_ctx, u) +
|
||||
subreq_size;
|
||||
crypto_skcipher_set_reqsize(tfm, tctx->hashed_tweak_offset +
|
||||
crypto_shash_statesize(polyval));
|
||||
return 0;
|
||||
|
||||
err_free_blockcipher:
|
||||
crypto_free_cipher(blockcipher);
|
||||
err_free_xctr:
|
||||
crypto_free_skcipher(xctr);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void hctr2_exit_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
crypto_free_cipher(tctx->blockcipher);
|
||||
crypto_free_skcipher(tctx->xctr);
|
||||
crypto_free_shash(tctx->polyval);
|
||||
}
|
||||
|
||||
static void hctr2_free_instance(struct skcipher_instance *inst)
|
||||
{
|
||||
struct hctr2_instance_ctx *ictx = skcipher_instance_ctx(inst);
|
||||
|
||||
crypto_drop_cipher(&ictx->blockcipher_spawn);
|
||||
crypto_drop_skcipher(&ictx->xctr_spawn);
|
||||
crypto_drop_shash(&ictx->polyval_spawn);
|
||||
kfree(inst);
|
||||
}
|
||||
|
||||
static int hctr2_create_common(struct crypto_template *tmpl,
|
||||
struct rtattr **tb,
|
||||
const char *xctr_name,
|
||||
const char *polyval_name)
|
||||
{
|
||||
u32 mask;
|
||||
struct skcipher_instance *inst;
|
||||
struct hctr2_instance_ctx *ictx;
|
||||
struct skcipher_alg *xctr_alg;
|
||||
struct crypto_alg *blockcipher_alg;
|
||||
struct shash_alg *polyval_alg;
|
||||
char blockcipher_name[CRYPTO_MAX_ALG_NAME];
|
||||
int len;
|
||||
int err;
|
||||
|
||||
err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL);
|
||||
if (!inst)
|
||||
return -ENOMEM;
|
||||
ictx = skcipher_instance_ctx(inst);
|
||||
|
||||
/* Stream cipher, xctr(block_cipher) */
|
||||
err = crypto_grab_skcipher(&ictx->xctr_spawn,
|
||||
skcipher_crypto_instance(inst),
|
||||
xctr_name, 0, mask);
|
||||
if (err)
|
||||
goto err_free_inst;
|
||||
xctr_alg = crypto_spawn_skcipher_alg(&ictx->xctr_spawn);
|
||||
|
||||
err = -EINVAL;
|
||||
if (strncmp(xctr_alg->base.cra_name, "xctr(", 5))
|
||||
goto err_free_inst;
|
||||
len = strscpy(blockcipher_name, xctr_alg->base.cra_name + 5,
|
||||
sizeof(blockcipher_name));
|
||||
if (len < 1)
|
||||
goto err_free_inst;
|
||||
if (blockcipher_name[len - 1] != ')')
|
||||
goto err_free_inst;
|
||||
blockcipher_name[len - 1] = 0;
|
||||
|
||||
/* Block cipher, e.g. "aes" */
|
||||
err = crypto_grab_cipher(&ictx->blockcipher_spawn,
|
||||
skcipher_crypto_instance(inst),
|
||||
blockcipher_name, 0, mask);
|
||||
if (err)
|
||||
goto err_free_inst;
|
||||
blockcipher_alg = crypto_spawn_cipher_alg(&ictx->blockcipher_spawn);
|
||||
|
||||
/* Require blocksize of 16 bytes */
|
||||
err = -EINVAL;
|
||||
if (blockcipher_alg->cra_blocksize != BLOCKCIPHER_BLOCK_SIZE)
|
||||
goto err_free_inst;
|
||||
|
||||
/* Polyval ε-∆U hash function */
|
||||
err = crypto_grab_shash(&ictx->polyval_spawn,
|
||||
skcipher_crypto_instance(inst),
|
||||
polyval_name, 0, mask);
|
||||
if (err)
|
||||
goto err_free_inst;
|
||||
polyval_alg = crypto_spawn_shash_alg(&ictx->polyval_spawn);
|
||||
|
||||
/* Ensure Polyval is being used */
|
||||
err = -EINVAL;
|
||||
if (strcmp(polyval_alg->base.cra_name, "polyval") != 0)
|
||||
goto err_free_inst;
|
||||
|
||||
/* Instance fields */
|
||||
|
||||
err = -ENAMETOOLONG;
|
||||
if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "hctr2(%s)",
|
||||
blockcipher_alg->cra_name) >= CRYPTO_MAX_ALG_NAME)
|
||||
goto err_free_inst;
|
||||
if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
|
||||
"hctr2_base(%s,%s)",
|
||||
xctr_alg->base.cra_driver_name,
|
||||
polyval_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
|
||||
goto err_free_inst;
|
||||
|
||||
inst->alg.base.cra_blocksize = BLOCKCIPHER_BLOCK_SIZE;
|
||||
inst->alg.base.cra_ctxsize = sizeof(struct hctr2_tfm_ctx) +
|
||||
polyval_alg->statesize * 2;
|
||||
inst->alg.base.cra_alignmask = xctr_alg->base.cra_alignmask |
|
||||
polyval_alg->base.cra_alignmask;
|
||||
/*
|
||||
* The hash function is called twice, so it is weighted higher than the
|
||||
* xctr and blockcipher.
|
||||
*/
|
||||
inst->alg.base.cra_priority = (2 * xctr_alg->base.cra_priority +
|
||||
4 * polyval_alg->base.cra_priority +
|
||||
blockcipher_alg->cra_priority) / 7;
|
||||
|
||||
inst->alg.setkey = hctr2_setkey;
|
||||
inst->alg.encrypt = hctr2_encrypt;
|
||||
inst->alg.decrypt = hctr2_decrypt;
|
||||
inst->alg.init = hctr2_init_tfm;
|
||||
inst->alg.exit = hctr2_exit_tfm;
|
||||
inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(xctr_alg);
|
||||
inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(xctr_alg);
|
||||
inst->alg.ivsize = TWEAK_SIZE;
|
||||
|
||||
inst->free = hctr2_free_instance;
|
||||
|
||||
err = skcipher_register_instance(tmpl, inst);
|
||||
if (err) {
|
||||
err_free_inst:
|
||||
hctr2_free_instance(inst);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int hctr2_create_base(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
{
|
||||
const char *xctr_name;
|
||||
const char *polyval_name;
|
||||
|
||||
xctr_name = crypto_attr_alg_name(tb[1]);
|
||||
if (IS_ERR(xctr_name))
|
||||
return PTR_ERR(xctr_name);
|
||||
|
||||
polyval_name = crypto_attr_alg_name(tb[2]);
|
||||
if (IS_ERR(polyval_name))
|
||||
return PTR_ERR(polyval_name);
|
||||
|
||||
return hctr2_create_common(tmpl, tb, xctr_name, polyval_name);
|
||||
}
|
||||
|
||||
static int hctr2_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
{
|
||||
const char *blockcipher_name;
|
||||
char xctr_name[CRYPTO_MAX_ALG_NAME];
|
||||
|
||||
blockcipher_name = crypto_attr_alg_name(tb[1]);
|
||||
if (IS_ERR(blockcipher_name))
|
||||
return PTR_ERR(blockcipher_name);
|
||||
|
||||
if (snprintf(xctr_name, CRYPTO_MAX_ALG_NAME, "xctr(%s)",
|
||||
blockcipher_name) >= CRYPTO_MAX_ALG_NAME)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
return hctr2_create_common(tmpl, tb, xctr_name, "polyval");
|
||||
}
|
||||
|
||||
static struct crypto_template hctr2_tmpls[] = {
|
||||
{
|
||||
/* hctr2_base(xctr_name, polyval_name) */
|
||||
.name = "hctr2_base",
|
||||
.create = hctr2_create_base,
|
||||
.module = THIS_MODULE,
|
||||
}, {
|
||||
/* hctr2(blockcipher_name) */
|
||||
.name = "hctr2",
|
||||
.create = hctr2_create,
|
||||
.module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init hctr2_module_init(void)
|
||||
{
|
||||
return crypto_register_templates(hctr2_tmpls, ARRAY_SIZE(hctr2_tmpls));
|
||||
}
|
||||
|
||||
static void __exit hctr2_module_exit(void)
|
||||
{
|
||||
return crypto_unregister_templates(hctr2_tmpls,
|
||||
ARRAY_SIZE(hctr2_tmpls));
|
||||
}
|
||||
|
||||
subsys_initcall(hctr2_module_init);
|
||||
module_exit(hctr2_module_exit);
|
||||
|
||||
MODULE_DESCRIPTION("HCTR2 length-preserving encryption mode");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("hctr2");
|
||||
MODULE_IMPORT_NS(CRYPTO_INTERNAL);
|
245
crypto/polyval-generic.c
Normal file
245
crypto/polyval-generic.c
Normal file
@ -0,0 +1,245 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* POLYVAL: hash function for HCTR2.
|
||||
*
|
||||
* Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
|
||||
* Copyright (c) 2009 Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
* Copyright 2021 Google LLC
|
||||
*/
|
||||
|
||||
/*
|
||||
* Code based on crypto/ghash-generic.c
|
||||
*
|
||||
* POLYVAL is a keyed hash function similar to GHASH. POLYVAL uses a different
|
||||
* modulus for finite field multiplication which makes hardware accelerated
|
||||
* implementations on little-endian machines faster. POLYVAL is used in the
|
||||
* kernel to implement HCTR2, but was originally specified for AES-GCM-SIV
|
||||
* (RFC 8452).
|
||||
*
|
||||
* For more information see:
|
||||
* Length-preserving encryption with HCTR2:
|
||||
* https://eprint.iacr.org/2021/1441.pdf
|
||||
* AES-GCM-SIV: Nonce Misuse-Resistant Authenticated Encryption:
|
||||
* https://datatracker.ietf.org/doc/html/rfc8452
|
||||
*
|
||||
* Like GHASH, POLYVAL is not a cryptographic hash function and should
|
||||
* not be used outside of crypto modes explicitly designed to use POLYVAL.
|
||||
*
|
||||
* This implementation uses a convenient trick involving the GHASH and POLYVAL
|
||||
* fields. This trick allows multiplication in the POLYVAL field to be
|
||||
* implemented by using multiplication in the GHASH field as a subroutine. An
|
||||
* element of the POLYVAL field can be converted to an element of the GHASH
|
||||
* field by computing x*REVERSE(a), where REVERSE reverses the byte-ordering of
|
||||
* a. Similarly, an element of the GHASH field can be converted back to the
|
||||
* POLYVAL field by computing REVERSE(x^{-1}*a). For more information, see:
|
||||
* https://datatracker.ietf.org/doc/html/rfc8452#appendix-A
|
||||
*
|
||||
* By using this trick, we do not need to implement the POLYVAL field for the
|
||||
* generic implementation.
|
||||
*
|
||||
* Warning: this generic implementation is not intended to be used in practice
|
||||
* and is not constant time. For practical use, a hardware accelerated
|
||||
* implementation of POLYVAL should be used instead.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/gf128mul.h>
|
||||
#include <crypto/polyval.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
struct polyval_tfm_ctx {
|
||||
struct gf128mul_4k *gf128;
|
||||
};
|
||||
|
||||
struct polyval_desc_ctx {
|
||||
union {
|
||||
u8 buffer[POLYVAL_BLOCK_SIZE];
|
||||
be128 buffer128;
|
||||
};
|
||||
u32 bytes;
|
||||
};
|
||||
|
||||
static void copy_and_reverse(u8 dst[POLYVAL_BLOCK_SIZE],
|
||||
const u8 src[POLYVAL_BLOCK_SIZE])
|
||||
{
|
||||
u64 a = get_unaligned((const u64 *)&src[0]);
|
||||
u64 b = get_unaligned((const u64 *)&src[8]);
|
||||
|
||||
put_unaligned(swab64(a), (u64 *)&dst[8]);
|
||||
put_unaligned(swab64(b), (u64 *)&dst[0]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Performs multiplication in the POLYVAL field using the GHASH field as a
|
||||
* subroutine. This function is used as a fallback for hardware accelerated
|
||||
* implementations when simd registers are unavailable.
|
||||
*
|
||||
* Note: This function is not used for polyval-generic, instead we use the 4k
|
||||
* lookup table implementation for finite field multiplication.
|
||||
*/
|
||||
void polyval_mul_non4k(u8 *op1, const u8 *op2)
|
||||
{
|
||||
be128 a, b;
|
||||
|
||||
// Assume one argument is in Montgomery form and one is not.
|
||||
copy_and_reverse((u8 *)&a, op1);
|
||||
copy_and_reverse((u8 *)&b, op2);
|
||||
gf128mul_x_lle(&a, &a);
|
||||
gf128mul_lle(&a, &b);
|
||||
copy_and_reverse(op1, (u8 *)&a);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(polyval_mul_non4k);
|
||||
|
||||
/*
|
||||
* Perform a POLYVAL update using non4k multiplication. This function is used
|
||||
* as a fallback for hardware accelerated implementations when simd registers
|
||||
* are unavailable.
|
||||
*
|
||||
* Note: This function is not used for polyval-generic, instead we use the 4k
|
||||
* lookup table implementation of finite field multiplication.
|
||||
*/
|
||||
void polyval_update_non4k(const u8 *key, const u8 *in,
|
||||
size_t nblocks, u8 *accumulator)
|
||||
{
|
||||
while (nblocks--) {
|
||||
crypto_xor(accumulator, in, POLYVAL_BLOCK_SIZE);
|
||||
polyval_mul_non4k(accumulator, key);
|
||||
in += POLYVAL_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(polyval_update_non4k);
|
||||
|
||||
static int polyval_setkey(struct crypto_shash *tfm,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct polyval_tfm_ctx *ctx = crypto_shash_ctx(tfm);
|
||||
be128 k;
|
||||
|
||||
if (keylen != POLYVAL_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
gf128mul_free_4k(ctx->gf128);
|
||||
|
||||
BUILD_BUG_ON(sizeof(k) != POLYVAL_BLOCK_SIZE);
|
||||
copy_and_reverse((u8 *)&k, key);
|
||||
gf128mul_x_lle(&k, &k);
|
||||
|
||||
ctx->gf128 = gf128mul_init_4k_lle(&k);
|
||||
memzero_explicit(&k, POLYVAL_BLOCK_SIZE);
|
||||
|
||||
if (!ctx->gf128)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_init(struct shash_desc *desc)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
|
||||
memset(dctx, 0, sizeof(*dctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_update(struct shash_desc *desc,
|
||||
const u8 *src, unsigned int srclen)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
const struct polyval_tfm_ctx *ctx = crypto_shash_ctx(desc->tfm);
|
||||
u8 *pos;
|
||||
u8 tmp[POLYVAL_BLOCK_SIZE];
|
||||
int n;
|
||||
|
||||
if (dctx->bytes) {
|
||||
n = min(srclen, dctx->bytes);
|
||||
pos = dctx->buffer + dctx->bytes - 1;
|
||||
|
||||
dctx->bytes -= n;
|
||||
srclen -= n;
|
||||
|
||||
while (n--)
|
||||
*pos-- ^= *src++;
|
||||
|
||||
if (!dctx->bytes)
|
||||
gf128mul_4k_lle(&dctx->buffer128, ctx->gf128);
|
||||
}
|
||||
|
||||
while (srclen >= POLYVAL_BLOCK_SIZE) {
|
||||
copy_and_reverse(tmp, src);
|
||||
crypto_xor(dctx->buffer, tmp, POLYVAL_BLOCK_SIZE);
|
||||
gf128mul_4k_lle(&dctx->buffer128, ctx->gf128);
|
||||
src += POLYVAL_BLOCK_SIZE;
|
||||
srclen -= POLYVAL_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
if (srclen) {
|
||||
dctx->bytes = POLYVAL_BLOCK_SIZE - srclen;
|
||||
pos = dctx->buffer + POLYVAL_BLOCK_SIZE - 1;
|
||||
while (srclen--)
|
||||
*pos-- ^= *src++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int polyval_final(struct shash_desc *desc, u8 *dst)
|
||||
{
|
||||
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
const struct polyval_tfm_ctx *ctx = crypto_shash_ctx(desc->tfm);
|
||||
|
||||
if (dctx->bytes)
|
||||
gf128mul_4k_lle(&dctx->buffer128, ctx->gf128);
|
||||
copy_and_reverse(dst, dctx->buffer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void polyval_exit_tfm(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct polyval_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
gf128mul_free_4k(ctx->gf128);
|
||||
}
|
||||
|
||||
static struct shash_alg polyval_alg = {
|
||||
.digestsize = POLYVAL_DIGEST_SIZE,
|
||||
.init = polyval_init,
|
||||
.update = polyval_update,
|
||||
.final = polyval_final,
|
||||
.setkey = polyval_setkey,
|
||||
.descsize = sizeof(struct polyval_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "polyval",
|
||||
.cra_driver_name = "polyval-generic",
|
||||
.cra_priority = 100,
|
||||
.cra_blocksize = POLYVAL_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct polyval_tfm_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_exit = polyval_exit_tfm,
|
||||
},
|
||||
};
|
||||
|
||||
static int __init polyval_mod_init(void)
|
||||
{
|
||||
return crypto_register_shash(&polyval_alg);
|
||||
}
|
||||
|
||||
static void __exit polyval_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_shash(&polyval_alg);
|
||||
}
|
||||
|
||||
subsys_initcall(polyval_mod_init);
|
||||
module_exit(polyval_mod_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("POLYVAL hash function");
|
||||
MODULE_ALIAS_CRYPTO("polyval");
|
||||
MODULE_ALIAS_CRYPTO("polyval-generic");
|
78
crypto/rsa.c
78
crypto/rsa.c
@ -17,6 +17,11 @@ struct rsa_mpi_key {
|
||||
MPI n;
|
||||
MPI e;
|
||||
MPI d;
|
||||
MPI p;
|
||||
MPI q;
|
||||
MPI dp;
|
||||
MPI dq;
|
||||
MPI qinv;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -35,16 +40,49 @@ static int _rsa_enc(const struct rsa_mpi_key *key, MPI c, MPI m)
|
||||
|
||||
/*
|
||||
* RSADP function [RFC3447 sec 5.1.2]
|
||||
* m = c^d mod n;
|
||||
* m_1 = c^dP mod p;
|
||||
* m_2 = c^dQ mod q;
|
||||
* h = (m_1 - m_2) * qInv mod p;
|
||||
* m = m_2 + q * h;
|
||||
*/
|
||||
static int _rsa_dec(const struct rsa_mpi_key *key, MPI m, MPI c)
|
||||
static int _rsa_dec_crt(const struct rsa_mpi_key *key, MPI m_or_m1_or_h, MPI c)
|
||||
{
|
||||
MPI m2, m12_or_qh;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
/* (1) Validate 0 <= c < n */
|
||||
if (mpi_cmp_ui(c, 0) < 0 || mpi_cmp(c, key->n) >= 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* (2) m = c^d mod n */
|
||||
return mpi_powm(m, c, key->d, key->n);
|
||||
m2 = mpi_alloc(0);
|
||||
m12_or_qh = mpi_alloc(0);
|
||||
if (!m2 || !m12_or_qh)
|
||||
goto err_free_mpi;
|
||||
|
||||
/* (2i) m_1 = c^dP mod p */
|
||||
ret = mpi_powm(m_or_m1_or_h, c, key->dp, key->p);
|
||||
if (ret)
|
||||
goto err_free_mpi;
|
||||
|
||||
/* (2i) m_2 = c^dQ mod q */
|
||||
ret = mpi_powm(m2, c, key->dq, key->q);
|
||||
if (ret)
|
||||
goto err_free_mpi;
|
||||
|
||||
/* (2iii) h = (m_1 - m_2) * qInv mod p */
|
||||
mpi_sub(m12_or_qh, m_or_m1_or_h, m2);
|
||||
mpi_mulm(m_or_m1_or_h, m12_or_qh, key->qinv, key->p);
|
||||
|
||||
/* (2iv) m = m_2 + q * h */
|
||||
mpi_mul(m12_or_qh, key->q, m_or_m1_or_h);
|
||||
mpi_addm(m_or_m1_or_h, m2, m12_or_qh, key->n);
|
||||
|
||||
ret = 0;
|
||||
|
||||
err_free_mpi:
|
||||
mpi_free(m12_or_qh);
|
||||
mpi_free(m2);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct rsa_mpi_key *rsa_get_key(struct crypto_akcipher *tfm)
|
||||
@ -112,7 +150,7 @@ static int rsa_dec(struct akcipher_request *req)
|
||||
if (!c)
|
||||
goto err_free_m;
|
||||
|
||||
ret = _rsa_dec(pkey, m, c);
|
||||
ret = _rsa_dec_crt(pkey, m, c);
|
||||
if (ret)
|
||||
goto err_free_c;
|
||||
|
||||
@ -134,9 +172,19 @@ static void rsa_free_mpi_key(struct rsa_mpi_key *key)
|
||||
mpi_free(key->d);
|
||||
mpi_free(key->e);
|
||||
mpi_free(key->n);
|
||||
mpi_free(key->p);
|
||||
mpi_free(key->q);
|
||||
mpi_free(key->dp);
|
||||
mpi_free(key->dq);
|
||||
mpi_free(key->qinv);
|
||||
key->d = NULL;
|
||||
key->e = NULL;
|
||||
key->n = NULL;
|
||||
key->p = NULL;
|
||||
key->q = NULL;
|
||||
key->dp = NULL;
|
||||
key->dq = NULL;
|
||||
key->qinv = NULL;
|
||||
}
|
||||
|
||||
static int rsa_check_key_length(unsigned int len)
|
||||
@ -217,6 +265,26 @@ static int rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
|
||||
if (!mpi_key->n)
|
||||
goto err;
|
||||
|
||||
mpi_key->p = mpi_read_raw_data(raw_key.p, raw_key.p_sz);
|
||||
if (!mpi_key->p)
|
||||
goto err;
|
||||
|
||||
mpi_key->q = mpi_read_raw_data(raw_key.q, raw_key.q_sz);
|
||||
if (!mpi_key->q)
|
||||
goto err;
|
||||
|
||||
mpi_key->dp = mpi_read_raw_data(raw_key.dp, raw_key.dp_sz);
|
||||
if (!mpi_key->dp)
|
||||
goto err;
|
||||
|
||||
mpi_key->dq = mpi_read_raw_data(raw_key.dq, raw_key.dq_sz);
|
||||
if (!mpi_key->dq)
|
||||
goto err;
|
||||
|
||||
mpi_key->qinv = mpi_read_raw_data(raw_key.qinv, raw_key.qinv_sz);
|
||||
if (!mpi_key->qinv)
|
||||
goto err;
|
||||
|
||||
if (rsa_check_key_length(mpi_get_size(mpi_key->n) << 3)) {
|
||||
rsa_free_mpi_key(mpi_key);
|
||||
return -EINVAL;
|
||||
|
@ -1556,6 +1556,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
ret += tcrypt_test("rfc3686(ctr(aes))");
|
||||
ret += tcrypt_test("ofb(aes)");
|
||||
ret += tcrypt_test("cfb(aes)");
|
||||
ret += tcrypt_test("xctr(aes)");
|
||||
break;
|
||||
|
||||
case 11:
|
||||
@ -1669,10 +1670,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
ret += tcrypt_test("rmd160");
|
||||
break;
|
||||
|
||||
case 41:
|
||||
ret += tcrypt_test("blake2s-256");
|
||||
break;
|
||||
|
||||
case 42:
|
||||
ret += tcrypt_test("blake2b-512");
|
||||
break;
|
||||
@ -1729,6 +1726,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
ret += tcrypt_test("ccm(sm4)");
|
||||
break;
|
||||
|
||||
case 57:
|
||||
ret += tcrypt_test("polyval");
|
||||
break;
|
||||
|
||||
case 100:
|
||||
ret += tcrypt_test("hmac(md5)");
|
||||
break;
|
||||
@ -2186,6 +2187,11 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
16, 16, aead_speed_template_19, num_mb);
|
||||
break;
|
||||
|
||||
case 226:
|
||||
test_cipher_speed("hctr2(aes)", ENCRYPT, sec, NULL,
|
||||
0, speed_template_32);
|
||||
break;
|
||||
|
||||
case 300:
|
||||
if (alg) {
|
||||
test_hash_speed(alg, sec, generic_hash_speed_template);
|
||||
@ -2240,10 +2246,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
test_hash_speed("rmd160", sec, generic_hash_speed_template);
|
||||
if (mode > 300 && mode < 400) break;
|
||||
fallthrough;
|
||||
case 316:
|
||||
test_hash_speed("blake2s-256", sec, generic_hash_speed_template);
|
||||
if (mode > 300 && mode < 400) break;
|
||||
fallthrough;
|
||||
case 317:
|
||||
test_hash_speed("blake2b-512", sec, generic_hash_speed_template);
|
||||
if (mode > 300 && mode < 400) break;
|
||||
@ -2352,10 +2354,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
test_ahash_speed("rmd160", sec, generic_hash_speed_template);
|
||||
if (mode > 400 && mode < 500) break;
|
||||
fallthrough;
|
||||
case 416:
|
||||
test_ahash_speed("blake2s-256", sec, generic_hash_speed_template);
|
||||
if (mode > 400 && mode < 500) break;
|
||||
fallthrough;
|
||||
case 417:
|
||||
test_ahash_speed("blake2b-512", sec, generic_hash_speed_template);
|
||||
if (mode > 400 && mode < 500) break;
|
||||
|
@ -4375,30 +4375,6 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.hash = __VECS(blake2b_512_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "blake2s-128",
|
||||
.test = alg_test_hash,
|
||||
.suite = {
|
||||
.hash = __VECS(blakes2s_128_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "blake2s-160",
|
||||
.test = alg_test_hash,
|
||||
.suite = {
|
||||
.hash = __VECS(blakes2s_160_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "blake2s-224",
|
||||
.test = alg_test_hash,
|
||||
.suite = {
|
||||
.hash = __VECS(blakes2s_224_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "blake2s-256",
|
||||
.test = alg_test_hash,
|
||||
.suite = {
|
||||
.hash = __VECS(blakes2s_256_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "cbc(aes)",
|
||||
.test = alg_test_skcipher,
|
||||
@ -5088,6 +5064,14 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.hash = __VECS(ghash_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "hctr2(aes)",
|
||||
.generic_driver =
|
||||
"hctr2_base(xctr(aes-generic),polyval-generic)",
|
||||
.test = alg_test_skcipher,
|
||||
.suite = {
|
||||
.cipher = __VECS(aes_hctr2_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "hmac(md5)",
|
||||
.test = alg_test_hash,
|
||||
@ -5342,6 +5326,12 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.hash = __VECS(poly1305_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "polyval",
|
||||
.test = alg_test_hash,
|
||||
.suite = {
|
||||
.hash = __VECS(polyval_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "rfc3686(ctr(aes))",
|
||||
.test = alg_test_skcipher,
|
||||
@ -5548,6 +5538,12 @@ static const struct alg_test_desc alg_test_descs[] = {
|
||||
.suite = {
|
||||
.cipher = __VECS(xchacha20_tv_template)
|
||||
},
|
||||
}, {
|
||||
.alg = "xctr(aes)",
|
||||
.test = alg_test_skcipher,
|
||||
.suite = {
|
||||
.cipher = __VECS(aes_xctr_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "xts(aes)",
|
||||
.generic_driver = "xts(ecb(aes-generic))",
|
||||
|
1745
crypto/testmgr.h
1745
crypto/testmgr.h
File diff suppressed because it is too large
Load Diff
191
crypto/xctr.c
Normal file
191
crypto/xctr.c
Normal file
@ -0,0 +1,191 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* XCTR: XOR Counter mode - Adapted from ctr.c
|
||||
*
|
||||
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
|
||||
* Copyright 2021 Google LLC
|
||||
*/
|
||||
|
||||
/*
|
||||
* XCTR mode is a blockcipher mode of operation used to implement HCTR2. XCTR is
|
||||
* closely related to the CTR mode of operation; the main difference is that CTR
|
||||
* generates the keystream using E(CTR + IV) whereas XCTR generates the
|
||||
* keystream using E(CTR ^ IV). This allows implementations to avoid dealing
|
||||
* with multi-limb integers (as is required in CTR mode). XCTR is also specified
|
||||
* using little-endian arithmetic which makes it slightly faster on LE machines.
|
||||
*
|
||||
* See the HCTR2 paper for more details:
|
||||
* Length-preserving encryption with HCTR2
|
||||
* (https://eprint.iacr.org/2021/1441.pdf)
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/cipher.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
/* For now this implementation is limited to 16-byte blocks for simplicity */
|
||||
#define XCTR_BLOCKSIZE 16
|
||||
|
||||
static void crypto_xctr_crypt_final(struct skcipher_walk *walk,
|
||||
struct crypto_cipher *tfm, u32 byte_ctr)
|
||||
{
|
||||
u8 keystream[XCTR_BLOCKSIZE];
|
||||
const u8 *src = walk->src.virt.addr;
|
||||
u8 *dst = walk->dst.virt.addr;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
__le32 ctr32 = cpu_to_le32(byte_ctr / XCTR_BLOCKSIZE + 1);
|
||||
|
||||
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
|
||||
crypto_cipher_encrypt_one(tfm, keystream, walk->iv);
|
||||
crypto_xor_cpy(dst, keystream, src, nbytes);
|
||||
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
|
||||
}
|
||||
|
||||
static int crypto_xctr_crypt_segment(struct skcipher_walk *walk,
|
||||
struct crypto_cipher *tfm, u32 byte_ctr)
|
||||
{
|
||||
void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
|
||||
crypto_cipher_alg(tfm)->cia_encrypt;
|
||||
const u8 *src = walk->src.virt.addr;
|
||||
u8 *dst = walk->dst.virt.addr;
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
__le32 ctr32 = cpu_to_le32(byte_ctr / XCTR_BLOCKSIZE + 1);
|
||||
|
||||
do {
|
||||
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
|
||||
fn(crypto_cipher_tfm(tfm), dst, walk->iv);
|
||||
crypto_xor(dst, src, XCTR_BLOCKSIZE);
|
||||
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
|
||||
|
||||
le32_add_cpu(&ctr32, 1);
|
||||
|
||||
src += XCTR_BLOCKSIZE;
|
||||
dst += XCTR_BLOCKSIZE;
|
||||
} while ((nbytes -= XCTR_BLOCKSIZE) >= XCTR_BLOCKSIZE);
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int crypto_xctr_crypt_inplace(struct skcipher_walk *walk,
|
||||
struct crypto_cipher *tfm, u32 byte_ctr)
|
||||
{
|
||||
void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
|
||||
crypto_cipher_alg(tfm)->cia_encrypt;
|
||||
unsigned long alignmask = crypto_cipher_alignmask(tfm);
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u8 *data = walk->src.virt.addr;
|
||||
u8 tmp[XCTR_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
|
||||
u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1);
|
||||
__le32 ctr32 = cpu_to_le32(byte_ctr / XCTR_BLOCKSIZE + 1);
|
||||
|
||||
do {
|
||||
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
|
||||
fn(crypto_cipher_tfm(tfm), keystream, walk->iv);
|
||||
crypto_xor(data, keystream, XCTR_BLOCKSIZE);
|
||||
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
|
||||
|
||||
le32_add_cpu(&ctr32, 1);
|
||||
|
||||
data += XCTR_BLOCKSIZE;
|
||||
} while ((nbytes -= XCTR_BLOCKSIZE) >= XCTR_BLOCKSIZE);
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int crypto_xctr_crypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_cipher *cipher = skcipher_cipher_simple(tfm);
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
u32 byte_ctr = 0;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while (walk.nbytes >= XCTR_BLOCKSIZE) {
|
||||
if (walk.src.virt.addr == walk.dst.virt.addr)
|
||||
nbytes = crypto_xctr_crypt_inplace(&walk, cipher,
|
||||
byte_ctr);
|
||||
else
|
||||
nbytes = crypto_xctr_crypt_segment(&walk, cipher,
|
||||
byte_ctr);
|
||||
|
||||
byte_ctr += walk.nbytes - nbytes;
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
if (walk.nbytes) {
|
||||
crypto_xctr_crypt_final(&walk, cipher, byte_ctr);
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int crypto_xctr_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
{
|
||||
struct skcipher_instance *inst;
|
||||
struct crypto_alg *alg;
|
||||
int err;
|
||||
|
||||
inst = skcipher_alloc_instance_simple(tmpl, tb);
|
||||
if (IS_ERR(inst))
|
||||
return PTR_ERR(inst);
|
||||
|
||||
alg = skcipher_ialg_simple(inst);
|
||||
|
||||
/* Block size must be 16 bytes. */
|
||||
err = -EINVAL;
|
||||
if (alg->cra_blocksize != XCTR_BLOCKSIZE)
|
||||
goto out_free_inst;
|
||||
|
||||
/* XCTR mode is a stream cipher. */
|
||||
inst->alg.base.cra_blocksize = 1;
|
||||
|
||||
/*
|
||||
* To simplify the implementation, configure the skcipher walk to only
|
||||
* give a partial block at the very end, never earlier.
|
||||
*/
|
||||
inst->alg.chunksize = alg->cra_blocksize;
|
||||
|
||||
inst->alg.encrypt = crypto_xctr_crypt;
|
||||
inst->alg.decrypt = crypto_xctr_crypt;
|
||||
|
||||
err = skcipher_register_instance(tmpl, inst);
|
||||
if (err) {
|
||||
out_free_inst:
|
||||
inst->free(inst);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct crypto_template crypto_xctr_tmpl = {
|
||||
.name = "xctr",
|
||||
.create = crypto_xctr_create,
|
||||
.module = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int __init crypto_xctr_module_init(void)
|
||||
{
|
||||
return crypto_register_template(&crypto_xctr_tmpl);
|
||||
}
|
||||
|
||||
static void __exit crypto_xctr_module_exit(void)
|
||||
{
|
||||
crypto_unregister_template(&crypto_xctr_tmpl);
|
||||
}
|
||||
|
||||
subsys_initcall(crypto_xctr_module_init);
|
||||
module_exit(crypto_xctr_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("XCTR block cipher mode of operation");
|
||||
MODULE_ALIAS_CRYPTO("xctr");
|
||||
MODULE_IMPORT_NS(CRYPTO_INTERNAL);
|
@ -170,6 +170,7 @@ static int sun8i_ss_setup_ivs(struct skcipher_request *areq)
|
||||
while (i >= 0) {
|
||||
dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE);
|
||||
memzero_explicit(sf->iv[i], ivsize);
|
||||
i--;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
@ -528,25 +528,33 @@ static int allocate_flows(struct sun8i_ss_dev *ss)
|
||||
|
||||
ss->flows[i].biv = devm_kmalloc(ss->dev, AES_BLOCK_SIZE,
|
||||
GFP_KERNEL | GFP_DMA);
|
||||
if (!ss->flows[i].biv)
|
||||
if (!ss->flows[i].biv) {
|
||||
err = -ENOMEM;
|
||||
goto error_engine;
|
||||
}
|
||||
|
||||
for (j = 0; j < MAX_SG; j++) {
|
||||
ss->flows[i].iv[j] = devm_kmalloc(ss->dev, AES_BLOCK_SIZE,
|
||||
GFP_KERNEL | GFP_DMA);
|
||||
if (!ss->flows[i].iv[j])
|
||||
if (!ss->flows[i].iv[j]) {
|
||||
err = -ENOMEM;
|
||||
goto error_engine;
|
||||
}
|
||||
}
|
||||
|
||||
/* the padding could be up to two block. */
|
||||
ss->flows[i].pad = devm_kmalloc(ss->dev, MAX_PAD_SIZE,
|
||||
GFP_KERNEL | GFP_DMA);
|
||||
if (!ss->flows[i].pad)
|
||||
if (!ss->flows[i].pad) {
|
||||
err = -ENOMEM;
|
||||
goto error_engine;
|
||||
}
|
||||
ss->flows[i].result = devm_kmalloc(ss->dev, SHA256_DIGEST_SIZE,
|
||||
GFP_KERNEL | GFP_DMA);
|
||||
if (!ss->flows[i].result)
|
||||
if (!ss->flows[i].result) {
|
||||
err = -ENOMEM;
|
||||
goto error_engine;
|
||||
}
|
||||
|
||||
ss->flows[i].engine = crypto_engine_alloc_init(ss->dev, true);
|
||||
if (!ss->flows[i].engine) {
|
||||
|
@ -30,8 +30,8 @@ static int sun8i_ss_hashkey(struct sun8i_ss_hash_tfm_ctx *tfmctx, const u8 *key,
|
||||
int ret = 0;
|
||||
|
||||
xtfm = crypto_alloc_shash("sha1", 0, CRYPTO_ALG_NEED_FALLBACK);
|
||||
if (!xtfm)
|
||||
return -ENOMEM;
|
||||
if (IS_ERR(xtfm))
|
||||
return PTR_ERR(xtfm);
|
||||
|
||||
len = sizeof(*sdesc) + crypto_shash_descsize(xtfm);
|
||||
sdesc = kmalloc(len, GFP_KERNEL);
|
||||
@ -586,7 +586,8 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
|
||||
rctx->t_dst[k + 1].len = rctx->t_dst[k].len;
|
||||
}
|
||||
addr_xpad = dma_map_single(ss->dev, tfmctx->ipad, bs, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(ss->dev, addr_xpad)) {
|
||||
err = dma_mapping_error(ss->dev, addr_xpad);
|
||||
if (err) {
|
||||
dev_err(ss->dev, "Fail to create DMA mapping of ipad\n");
|
||||
goto err_dma_xpad;
|
||||
}
|
||||
@ -612,7 +613,8 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
|
||||
goto err_dma_result;
|
||||
}
|
||||
addr_xpad = dma_map_single(ss->dev, tfmctx->opad, bs, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(ss->dev, addr_xpad)) {
|
||||
err = dma_mapping_error(ss->dev, addr_xpad);
|
||||
if (err) {
|
||||
dev_err(ss->dev, "Fail to create DMA mapping of opad\n");
|
||||
goto err_dma_xpad;
|
||||
}
|
||||
|
@ -349,8 +349,16 @@ static int atmel_ecc_remove(struct i2c_client *client)
|
||||
|
||||
/* Return EBUSY if i2c client already allocated. */
|
||||
if (atomic_read(&i2c_priv->tfm_count)) {
|
||||
dev_err(&client->dev, "Device is busy\n");
|
||||
return -EBUSY;
|
||||
/*
|
||||
* After we return here, the memory backing the device is freed.
|
||||
* That happens no matter what the return value of this function
|
||||
* is because in the Linux device model there is no error
|
||||
* handling for unbinding a driver.
|
||||
* If there is still some action pending, it probably involves
|
||||
* accessing the freed memory.
|
||||
*/
|
||||
dev_emerg(&client->dev, "Device is busy, expect memory corruption.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
crypto_unregister_kpp(&atmel_ecdh_nist_p256);
|
||||
|
@ -366,7 +366,7 @@ struct ccp_device {
|
||||
|
||||
/* Master lists that all cmds are queued on. Because there can be
|
||||
* more than one CCP command queue that can process a cmd a separate
|
||||
* backlog list is neeeded so that the backlog completion call
|
||||
* backlog list is needed so that the backlog completion call
|
||||
* completes before the cmd is available for execution.
|
||||
*/
|
||||
spinlock_t cmd_lock ____cacheline_aligned;
|
||||
|
@ -503,7 +503,7 @@ static int __sev_platform_shutdown_locked(int *error)
|
||||
struct sev_device *sev = psp_master->sev_data;
|
||||
int ret;
|
||||
|
||||
if (sev->state == SEV_STATE_UNINIT)
|
||||
if (!sev || sev->state == SEV_STATE_UNINIT)
|
||||
return 0;
|
||||
|
||||
ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
|
||||
@ -577,6 +577,8 @@ static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
|
||||
struct sev_user_data_status data;
|
||||
int ret;
|
||||
|
||||
memset(&data, 0, sizeof(data));
|
||||
|
||||
ret = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS, &data, &argp->error);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -630,7 +632,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable)
|
||||
if (input.length > SEV_FW_BLOB_MAX_SIZE)
|
||||
return -EFAULT;
|
||||
|
||||
blob = kmalloc(input.length, GFP_KERNEL);
|
||||
blob = kzalloc(input.length, GFP_KERNEL);
|
||||
if (!blob)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -854,7 +856,7 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp)
|
||||
input_address = (void __user *)input.address;
|
||||
|
||||
if (input.address && input.length) {
|
||||
id_blob = kmalloc(input.length, GFP_KERNEL);
|
||||
id_blob = kzalloc(input.length, GFP_KERNEL);
|
||||
if (!id_blob)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -973,14 +975,14 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable)
|
||||
if (input.cert_chain_len > SEV_FW_BLOB_MAX_SIZE)
|
||||
return -EFAULT;
|
||||
|
||||
pdh_blob = kmalloc(input.pdh_cert_len, GFP_KERNEL);
|
||||
pdh_blob = kzalloc(input.pdh_cert_len, GFP_KERNEL);
|
||||
if (!pdh_blob)
|
||||
return -ENOMEM;
|
||||
|
||||
data.pdh_cert_address = __psp_pa(pdh_blob);
|
||||
data.pdh_cert_len = input.pdh_cert_len;
|
||||
|
||||
cert_blob = kmalloc(input.cert_chain_len, GFP_KERNEL);
|
||||
cert_blob = kzalloc(input.cert_chain_len, GFP_KERNEL);
|
||||
if (!cert_blob) {
|
||||
ret = -ENOMEM;
|
||||
goto e_free_pdh;
|
||||
|
@ -877,13 +877,6 @@ static void qm_pm_put_sync(struct hisi_qm *qm)
|
||||
pm_runtime_put_autosuspend(dev);
|
||||
}
|
||||
|
||||
static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe)
|
||||
{
|
||||
u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
|
||||
|
||||
return &qm->qp_array[cqn];
|
||||
}
|
||||
|
||||
static void qm_cq_head_update(struct hisi_qp *qp)
|
||||
{
|
||||
if (qp->qp_status.cq_head == QM_Q_DEPTH - 1) {
|
||||
@ -894,47 +887,37 @@ static void qm_cq_head_update(struct hisi_qp *qp)
|
||||
}
|
||||
}
|
||||
|
||||
static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
|
||||
static void qm_poll_req_cb(struct hisi_qp *qp)
|
||||
{
|
||||
if (unlikely(atomic_read(&qp->qp_status.flags) == QP_STOP))
|
||||
return;
|
||||
struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
|
||||
struct hisi_qm *qm = qp->qm;
|
||||
|
||||
if (qp->event_cb) {
|
||||
qp->event_cb(qp);
|
||||
return;
|
||||
}
|
||||
|
||||
if (qp->req_cb) {
|
||||
struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
|
||||
|
||||
while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
|
||||
dma_rmb();
|
||||
qp->req_cb(qp, qp->sqe + qm->sqe_size *
|
||||
le16_to_cpu(cqe->sq_head));
|
||||
qm_cq_head_update(qp);
|
||||
cqe = qp->cqe + qp->qp_status.cq_head;
|
||||
qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
|
||||
qp->qp_status.cq_head, 0);
|
||||
atomic_dec(&qp->qp_status.used);
|
||||
}
|
||||
|
||||
/* set c_flag */
|
||||
while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
|
||||
dma_rmb();
|
||||
qp->req_cb(qp, qp->sqe + qm->sqe_size *
|
||||
le16_to_cpu(cqe->sq_head));
|
||||
qm_cq_head_update(qp);
|
||||
cqe = qp->cqe + qp->qp_status.cq_head;
|
||||
qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
|
||||
qp->qp_status.cq_head, 1);
|
||||
qp->qp_status.cq_head, 0);
|
||||
atomic_dec(&qp->qp_status.used);
|
||||
}
|
||||
|
||||
/* set c_flag */
|
||||
qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ, qp->qp_status.cq_head, 1);
|
||||
}
|
||||
|
||||
static void qm_work_process(struct work_struct *work)
|
||||
static int qm_get_complete_eqe_num(struct hisi_qm_poll_data *poll_data)
|
||||
{
|
||||
struct hisi_qm *qm = container_of(work, struct hisi_qm, work);
|
||||
struct hisi_qm *qm = poll_data->qm;
|
||||
struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
|
||||
struct hisi_qp *qp;
|
||||
int eqe_num = 0;
|
||||
u16 cqn;
|
||||
|
||||
while (QM_EQE_PHASE(eqe) == qm->status.eqc_phase) {
|
||||
cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
|
||||
poll_data->qp_finish_id[eqe_num] = cqn;
|
||||
eqe_num++;
|
||||
qp = qm_to_hisi_qp(qm, eqe);
|
||||
qm_poll_qp(qp, qm);
|
||||
|
||||
if (qm->status.eq_head == QM_EQ_DEPTH - 1) {
|
||||
qm->status.eqc_phase = !qm->status.eqc_phase;
|
||||
@ -945,37 +928,70 @@ static void qm_work_process(struct work_struct *work)
|
||||
qm->status.eq_head++;
|
||||
}
|
||||
|
||||
if (eqe_num == QM_EQ_DEPTH / 2 - 1) {
|
||||
eqe_num = 0;
|
||||
qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
|
||||
}
|
||||
if (eqe_num == (QM_EQ_DEPTH >> 1) - 1)
|
||||
break;
|
||||
}
|
||||
|
||||
qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
|
||||
|
||||
return eqe_num;
|
||||
}
|
||||
|
||||
static irqreturn_t do_qm_irq(int irq, void *data)
|
||||
static void qm_work_process(struct work_struct *work)
|
||||
{
|
||||
struct hisi_qm *qm = (struct hisi_qm *)data;
|
||||
struct hisi_qm_poll_data *poll_data =
|
||||
container_of(work, struct hisi_qm_poll_data, work);
|
||||
struct hisi_qm *qm = poll_data->qm;
|
||||
struct hisi_qp *qp;
|
||||
int eqe_num, i;
|
||||
|
||||
/* the workqueue created by device driver of QM */
|
||||
if (qm->wq)
|
||||
queue_work(qm->wq, &qm->work);
|
||||
else
|
||||
schedule_work(&qm->work);
|
||||
/* Get qp id of completed tasks and re-enable the interrupt. */
|
||||
eqe_num = qm_get_complete_eqe_num(poll_data);
|
||||
for (i = eqe_num - 1; i >= 0; i--) {
|
||||
qp = &qm->qp_array[poll_data->qp_finish_id[i]];
|
||||
if (unlikely(atomic_read(&qp->qp_status.flags) == QP_STOP))
|
||||
continue;
|
||||
|
||||
return IRQ_HANDLED;
|
||||
if (qp->event_cb) {
|
||||
qp->event_cb(qp);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (likely(qp->req_cb))
|
||||
qm_poll_req_cb(qp);
|
||||
}
|
||||
}
|
||||
|
||||
static bool do_qm_irq(struct hisi_qm *qm)
|
||||
{
|
||||
struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
|
||||
struct hisi_qm_poll_data *poll_data;
|
||||
u16 cqn;
|
||||
|
||||
if (!readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
|
||||
return false;
|
||||
|
||||
if (QM_EQE_PHASE(eqe) == qm->status.eqc_phase) {
|
||||
cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
|
||||
poll_data = &qm->poll_data[cqn];
|
||||
queue_work(qm->wq, &poll_data->work);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static irqreturn_t qm_irq(int irq, void *data)
|
||||
{
|
||||
struct hisi_qm *qm = data;
|
||||
bool ret;
|
||||
|
||||
if (readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
|
||||
return do_qm_irq(irq, data);
|
||||
ret = do_qm_irq(qm);
|
||||
if (ret)
|
||||
return IRQ_HANDLED;
|
||||
|
||||
atomic64_inc(&qm->debug.dfx.err_irq_cnt);
|
||||
dev_err(&qm->pdev->dev, "invalid int source\n");
|
||||
qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
|
||||
|
||||
return IRQ_NONE;
|
||||
@ -3134,11 +3150,8 @@ static int qm_stop_qp_nolock(struct hisi_qp *qp)
|
||||
if (ret)
|
||||
dev_err(dev, "Failed to drain out data for stopping!\n");
|
||||
|
||||
if (qp->qm->wq)
|
||||
flush_workqueue(qp->qm->wq);
|
||||
else
|
||||
flush_work(&qp->qm->work);
|
||||
|
||||
flush_workqueue(qp->qm->wq);
|
||||
if (unlikely(qp->is_resetting && atomic_read(&qp->qp_status.used)))
|
||||
qp_stop_fail_cb(qp);
|
||||
|
||||
@ -3557,8 +3570,10 @@ static void hisi_qp_memory_uninit(struct hisi_qm *qm, int num)
|
||||
for (i = num - 1; i >= 0; i--) {
|
||||
qdma = &qm->qp_array[i].qdma;
|
||||
dma_free_coherent(dev, qdma->size, qdma->va, qdma->dma);
|
||||
kfree(qm->poll_data[i].qp_finish_id);
|
||||
}
|
||||
|
||||
kfree(qm->poll_data);
|
||||
kfree(qm->qp_array);
|
||||
}
|
||||
|
||||
@ -3567,12 +3582,18 @@ static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id)
|
||||
struct device *dev = &qm->pdev->dev;
|
||||
size_t off = qm->sqe_size * QM_Q_DEPTH;
|
||||
struct hisi_qp *qp;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
qm->poll_data[id].qp_finish_id = kcalloc(qm->qp_num, sizeof(u16),
|
||||
GFP_KERNEL);
|
||||
if (!qm->poll_data[id].qp_finish_id)
|
||||
return -ENOMEM;
|
||||
|
||||
qp = &qm->qp_array[id];
|
||||
qp->qdma.va = dma_alloc_coherent(dev, dma_size, &qp->qdma.dma,
|
||||
GFP_KERNEL);
|
||||
if (!qp->qdma.va)
|
||||
return -ENOMEM;
|
||||
goto err_free_qp_finish_id;
|
||||
|
||||
qp->sqe = qp->qdma.va;
|
||||
qp->sqe_dma = qp->qdma.dma;
|
||||
@ -3583,6 +3604,10 @@ static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id)
|
||||
qp->qp_id = id;
|
||||
|
||||
return 0;
|
||||
|
||||
err_free_qp_finish_id:
|
||||
kfree(qm->poll_data[id].qp_finish_id);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void hisi_qm_pre_init(struct hisi_qm *qm)
|
||||
@ -3672,6 +3697,26 @@ static void qm_last_regs_uninit(struct hisi_qm *qm)
|
||||
debug->qm_last_words = NULL;
|
||||
}
|
||||
|
||||
static void hisi_qm_unint_work(struct hisi_qm *qm)
|
||||
{
|
||||
destroy_workqueue(qm->wq);
|
||||
}
|
||||
|
||||
static void hisi_qm_memory_uninit(struct hisi_qm *qm)
|
||||
{
|
||||
struct device *dev = &qm->pdev->dev;
|
||||
|
||||
hisi_qp_memory_uninit(qm, qm->qp_num);
|
||||
if (qm->qdma.va) {
|
||||
hisi_qm_cache_wb(qm);
|
||||
dma_free_coherent(dev, qm->qdma.size,
|
||||
qm->qdma.va, qm->qdma.dma);
|
||||
}
|
||||
|
||||
idr_destroy(&qm->qp_idr);
|
||||
kfree(qm->factor);
|
||||
}
|
||||
|
||||
/**
|
||||
* hisi_qm_uninit() - Uninitialize qm.
|
||||
* @qm: The qm needed uninit.
|
||||
@ -3680,13 +3725,10 @@ static void qm_last_regs_uninit(struct hisi_qm *qm)
|
||||
*/
|
||||
void hisi_qm_uninit(struct hisi_qm *qm)
|
||||
{
|
||||
struct pci_dev *pdev = qm->pdev;
|
||||
struct device *dev = &pdev->dev;
|
||||
|
||||
qm_last_regs_uninit(qm);
|
||||
|
||||
qm_cmd_uninit(qm);
|
||||
kfree(qm->factor);
|
||||
hisi_qm_unint_work(qm);
|
||||
down_write(&qm->qps_lock);
|
||||
|
||||
if (!qm_avail_state(qm, QM_CLOSE)) {
|
||||
@ -3694,14 +3736,7 @@ void hisi_qm_uninit(struct hisi_qm *qm)
|
||||
return;
|
||||
}
|
||||
|
||||
hisi_qp_memory_uninit(qm, qm->qp_num);
|
||||
idr_destroy(&qm->qp_idr);
|
||||
|
||||
if (qm->qdma.va) {
|
||||
hisi_qm_cache_wb(qm);
|
||||
dma_free_coherent(dev, qm->qdma.size,
|
||||
qm->qdma.va, qm->qdma.dma);
|
||||
}
|
||||
hisi_qm_memory_uninit(qm);
|
||||
hisi_qm_set_state(qm, QM_NOT_READY);
|
||||
up_write(&qm->qps_lock);
|
||||
|
||||
@ -6018,14 +6053,28 @@ static int hisi_qm_pci_init(struct hisi_qm *qm)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void hisi_qm_init_work(struct hisi_qm *qm)
|
||||
static int hisi_qm_init_work(struct hisi_qm *qm)
|
||||
{
|
||||
INIT_WORK(&qm->work, qm_work_process);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < qm->qp_num; i++)
|
||||
INIT_WORK(&qm->poll_data[i].work, qm_work_process);
|
||||
|
||||
if (qm->fun_type == QM_HW_PF)
|
||||
INIT_WORK(&qm->rst_work, hisi_qm_controller_reset);
|
||||
|
||||
if (qm->ver > QM_HW_V2)
|
||||
INIT_WORK(&qm->cmd_process, qm_cmd_process);
|
||||
|
||||
qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM |
|
||||
WQ_UNBOUND, num_online_cpus(),
|
||||
pci_name(qm->pdev));
|
||||
if (!qm->wq) {
|
||||
pci_err(qm->pdev, "failed to alloc workqueue!\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int hisi_qp_alloc_memory(struct hisi_qm *qm)
|
||||
@ -6038,11 +6087,18 @@ static int hisi_qp_alloc_memory(struct hisi_qm *qm)
|
||||
if (!qm->qp_array)
|
||||
return -ENOMEM;
|
||||
|
||||
qm->poll_data = kcalloc(qm->qp_num, sizeof(struct hisi_qm_poll_data), GFP_KERNEL);
|
||||
if (!qm->poll_data) {
|
||||
kfree(qm->qp_array);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* one more page for device or qp statuses */
|
||||
qp_dma_size = qm->sqe_size * QM_Q_DEPTH +
|
||||
sizeof(struct qm_cqe) * QM_Q_DEPTH;
|
||||
qp_dma_size = PAGE_ALIGN(qp_dma_size) + PAGE_SIZE;
|
||||
for (i = 0; i < qm->qp_num; i++) {
|
||||
qm->poll_data[i].qm = qm;
|
||||
ret = hisi_qp_memory_init(qm, qp_dma_size, i);
|
||||
if (ret)
|
||||
goto err_init_qp_mem;
|
||||
@ -6176,7 +6232,10 @@ int hisi_qm_init(struct hisi_qm *qm)
|
||||
if (ret)
|
||||
goto err_alloc_uacce;
|
||||
|
||||
hisi_qm_init_work(qm);
|
||||
ret = hisi_qm_init_work(qm);
|
||||
if (ret)
|
||||
goto err_free_qm_memory;
|
||||
|
||||
qm_cmd_init(qm);
|
||||
atomic_set(&qm->status.flags, QM_INIT);
|
||||
|
||||
@ -6184,6 +6243,8 @@ int hisi_qm_init(struct hisi_qm *qm)
|
||||
|
||||
return 0;
|
||||
|
||||
err_free_qm_memory:
|
||||
hisi_qm_memory_uninit(qm);
|
||||
err_alloc_uacce:
|
||||
if (qm->use_sva) {
|
||||
uacce_remove(qm->uacce);
|
||||
|
@ -143,10 +143,10 @@ struct sec_ctx {
|
||||
/* Threshold for fake busy, trigger to return -EBUSY to user */
|
||||
u32 fake_req_limit;
|
||||
|
||||
/* Currrent cyclic index to select a queue for encipher */
|
||||
/* Current cyclic index to select a queue for encipher */
|
||||
atomic_t enc_qcyclic;
|
||||
|
||||
/* Currrent cyclic index to select a queue for decipher */
|
||||
/* Current cyclic index to select a queue for decipher */
|
||||
atomic_t dec_qcyclic;
|
||||
|
||||
enum sec_alg_type alg_type;
|
||||
|
@ -508,16 +508,17 @@ static int sec_engine_init(struct hisi_qm *qm)
|
||||
|
||||
writel(SEC_SAA_ENABLE, qm->io_base + SEC_SAA_EN_REG);
|
||||
|
||||
/* HW V2 enable sm4 extra mode, as ctr/ecb */
|
||||
if (qm->ver < QM_HW_V3)
|
||||
if (qm->ver < QM_HW_V3) {
|
||||
/* HW V2 enable sm4 extra mode, as ctr/ecb */
|
||||
writel_relaxed(SEC_BD_ERR_CHK_EN0,
|
||||
qm->io_base + SEC_BD_ERR_CHK_EN_REG0);
|
||||
|
||||
/* Enable sm4 xts mode multiple iv */
|
||||
writel_relaxed(SEC_BD_ERR_CHK_EN1,
|
||||
qm->io_base + SEC_BD_ERR_CHK_EN_REG1);
|
||||
writel_relaxed(SEC_BD_ERR_CHK_EN3,
|
||||
qm->io_base + SEC_BD_ERR_CHK_EN_REG3);
|
||||
/* HW V2 enable sm4 xts mode multiple iv */
|
||||
writel_relaxed(SEC_BD_ERR_CHK_EN1,
|
||||
qm->io_base + SEC_BD_ERR_CHK_EN_REG1);
|
||||
writel_relaxed(SEC_BD_ERR_CHK_EN3,
|
||||
qm->io_base + SEC_BD_ERR_CHK_EN_REG3);
|
||||
}
|
||||
|
||||
/* config endian */
|
||||
sec_set_endian(qm);
|
||||
@ -1002,8 +1003,6 @@ static int sec_pf_probe_init(struct sec_dev *sec)
|
||||
|
||||
static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
qm->pdev = pdev;
|
||||
qm->ver = pdev->revision;
|
||||
qm->algs = "cipher\ndigest\naead";
|
||||
@ -1029,25 +1028,7 @@ static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
|
||||
qm->qp_num = SEC_QUEUE_NUM_V1 - SEC_PF_DEF_Q_NUM;
|
||||
}
|
||||
|
||||
/*
|
||||
* WQ_HIGHPRI: SEC request must be low delayed,
|
||||
* so need a high priority workqueue.
|
||||
* WQ_UNBOUND: SEC task is likely with long
|
||||
* running CPU intensive workloads.
|
||||
*/
|
||||
qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM |
|
||||
WQ_UNBOUND, num_online_cpus(),
|
||||
pci_name(qm->pdev));
|
||||
if (!qm->wq) {
|
||||
pci_err(qm->pdev, "fail to alloc workqueue\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ret = hisi_qm_init(qm);
|
||||
if (ret)
|
||||
destroy_workqueue(qm->wq);
|
||||
|
||||
return ret;
|
||||
return hisi_qm_init(qm);
|
||||
}
|
||||
|
||||
static void sec_qm_uninit(struct hisi_qm *qm)
|
||||
@ -1078,8 +1059,6 @@ static int sec_probe_init(struct sec_dev *sec)
|
||||
static void sec_probe_uninit(struct hisi_qm *qm)
|
||||
{
|
||||
hisi_qm_dev_err_uninit(qm);
|
||||
|
||||
destroy_workqueue(qm->wq);
|
||||
}
|
||||
|
||||
static void sec_iommu_used_check(struct sec_dev *sec)
|
||||
|
@ -185,7 +185,7 @@ static int hisi_trng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
|
||||
struct hisi_trng *trng;
|
||||
int currsize = 0;
|
||||
u32 val = 0;
|
||||
u32 ret;
|
||||
int ret;
|
||||
|
||||
trng = container_of(rng, struct hisi_trng, rng);
|
||||
|
||||
|
@ -990,8 +990,6 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
|
||||
|
||||
static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
qm->pdev = pdev;
|
||||
qm->ver = pdev->revision;
|
||||
if (pdev->revision >= QM_HW_V3)
|
||||
@ -1021,25 +1019,12 @@ static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
|
||||
qm->qp_num = HZIP_QUEUE_NUM_V1 - HZIP_PF_DEF_Q_NUM;
|
||||
}
|
||||
|
||||
qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM |
|
||||
WQ_UNBOUND, num_online_cpus(),
|
||||
pci_name(qm->pdev));
|
||||
if (!qm->wq) {
|
||||
pci_err(qm->pdev, "fail to alloc workqueue\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ret = hisi_qm_init(qm);
|
||||
if (ret)
|
||||
destroy_workqueue(qm->wq);
|
||||
|
||||
return ret;
|
||||
return hisi_qm_init(qm);
|
||||
}
|
||||
|
||||
static void hisi_zip_qm_uninit(struct hisi_qm *qm)
|
||||
{
|
||||
hisi_qm_uninit(qm);
|
||||
destroy_workqueue(qm->wq);
|
||||
}
|
||||
|
||||
static int hisi_zip_probe_init(struct hisi_zip *hisi_zip)
|
||||
|
@ -51,11 +51,47 @@ static const struct devlink_param otx2_cpt_dl_params[] = {
|
||||
NULL),
|
||||
};
|
||||
|
||||
static int otx2_cpt_devlink_info_get(struct devlink *devlink,
|
||||
static int otx2_cpt_dl_info_firmware_version_put(struct devlink_info_req *req,
|
||||
struct otx2_cpt_eng_grp_info grp[],
|
||||
const char *ver_name, int eng_type)
|
||||
{
|
||||
struct otx2_cpt_engs_rsvd *eng;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) {
|
||||
eng = find_engines_by_type(&grp[i], eng_type);
|
||||
if (eng)
|
||||
return devlink_info_version_running_put(req, ver_name,
|
||||
eng->ucode->ver_str);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int otx2_cpt_devlink_info_get(struct devlink *dl,
|
||||
struct devlink_info_req *req,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
return devlink_info_driver_name_put(req, "rvu_cptpf");
|
||||
struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl);
|
||||
struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf;
|
||||
int err;
|
||||
|
||||
err = devlink_info_driver_name_put(req, "rvu_cptpf");
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp,
|
||||
"fw.ae", OTX2_CPT_AE_TYPES);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp,
|
||||
"fw.se", OTX2_CPT_SE_TYPES);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp,
|
||||
"fw.ie", OTX2_CPT_IE_TYPES);
|
||||
}
|
||||
|
||||
static const struct devlink_ops otx2_cpt_devlink_ops = {
|
||||
|
@ -476,7 +476,7 @@ static int cpt_ucode_load_fw(struct pci_dev *pdev, struct fw_info_t *fw_info)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct otx2_cpt_engs_rsvd *find_engines_by_type(
|
||||
struct otx2_cpt_engs_rsvd *find_engines_by_type(
|
||||
struct otx2_cpt_eng_grp_info *eng_grp,
|
||||
int eng_type)
|
||||
{
|
||||
@ -1605,7 +1605,10 @@ int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf,
|
||||
if (!strncasecmp(val, "se", 2) && strchr(val, ':')) {
|
||||
if (has_se || ucode_idx)
|
||||
goto err_print;
|
||||
tmp = strim(strsep(&val, ":"));
|
||||
tmp = strsep(&val, ":");
|
||||
if (!tmp)
|
||||
goto err_print;
|
||||
tmp = strim(tmp);
|
||||
if (!val)
|
||||
goto err_print;
|
||||
if (strlen(tmp) != 2)
|
||||
@ -1617,7 +1620,10 @@ int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf,
|
||||
} else if (!strncasecmp(val, "ae", 2) && strchr(val, ':')) {
|
||||
if (has_ae || ucode_idx)
|
||||
goto err_print;
|
||||
tmp = strim(strsep(&val, ":"));
|
||||
tmp = strsep(&val, ":");
|
||||
if (!tmp)
|
||||
goto err_print;
|
||||
tmp = strim(tmp);
|
||||
if (!val)
|
||||
goto err_print;
|
||||
if (strlen(tmp) != 2)
|
||||
@ -1629,7 +1635,10 @@ int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf,
|
||||
} else if (!strncasecmp(val, "ie", 2) && strchr(val, ':')) {
|
||||
if (has_ie || ucode_idx)
|
||||
goto err_print;
|
||||
tmp = strim(strsep(&val, ":"));
|
||||
tmp = strsep(&val, ":");
|
||||
if (!tmp)
|
||||
goto err_print;
|
||||
tmp = strim(tmp);
|
||||
if (!val)
|
||||
goto err_print;
|
||||
if (strlen(tmp) != 2)
|
||||
|
@ -166,4 +166,7 @@ int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf,
|
||||
int otx2_cpt_dl_custom_egrp_delete(struct otx2_cptpf_dev *cptpf,
|
||||
struct devlink_param_gset_ctx *ctx);
|
||||
void otx2_cpt_print_uc_dbg_info(struct otx2_cptpf_dev *cptpf);
|
||||
struct otx2_cpt_engs_rsvd *find_engines_by_type(
|
||||
struct otx2_cpt_eng_grp_info *eng_grp,
|
||||
int eng_type);
|
||||
#endif /* __OTX2_CPTPF_UCODE_H */
|
||||
|
@ -17,7 +17,7 @@ config CRYPTO_DEV_QAT
|
||||
|
||||
config CRYPTO_DEV_QAT_DH895xCC
|
||||
tristate "Support for Intel(R) DH895xCC"
|
||||
depends on X86 && PCI
|
||||
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
|
||||
select CRYPTO_DEV_QAT
|
||||
help
|
||||
Support for Intel(R) DH895xcc with Intel(R) QuickAssist Technology
|
||||
@ -28,7 +28,7 @@ config CRYPTO_DEV_QAT_DH895xCC
|
||||
|
||||
config CRYPTO_DEV_QAT_C3XXX
|
||||
tristate "Support for Intel(R) C3XXX"
|
||||
depends on X86 && PCI
|
||||
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
|
||||
select CRYPTO_DEV_QAT
|
||||
help
|
||||
Support for Intel(R) C3xxx with Intel(R) QuickAssist Technology
|
||||
@ -39,7 +39,7 @@ config CRYPTO_DEV_QAT_C3XXX
|
||||
|
||||
config CRYPTO_DEV_QAT_C62X
|
||||
tristate "Support for Intel(R) C62X"
|
||||
depends on X86 && PCI
|
||||
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
|
||||
select CRYPTO_DEV_QAT
|
||||
help
|
||||
Support for Intel(R) C62x with Intel(R) QuickAssist Technology
|
||||
@ -50,7 +50,7 @@ config CRYPTO_DEV_QAT_C62X
|
||||
|
||||
config CRYPTO_DEV_QAT_4XXX
|
||||
tristate "Support for Intel(R) QAT_4XXX"
|
||||
depends on X86 && PCI
|
||||
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
|
||||
select CRYPTO_DEV_QAT
|
||||
help
|
||||
Support for Intel(R) QuickAssist Technology QAT_4xxx
|
||||
@ -61,7 +61,7 @@ config CRYPTO_DEV_QAT_4XXX
|
||||
|
||||
config CRYPTO_DEV_QAT_DH895xCCVF
|
||||
tristate "Support for Intel(R) DH895xCC Virtual Function"
|
||||
depends on X86 && PCI
|
||||
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
|
||||
select PCI_IOV
|
||||
select CRYPTO_DEV_QAT
|
||||
|
||||
@ -74,7 +74,7 @@ config CRYPTO_DEV_QAT_DH895xCCVF
|
||||
|
||||
config CRYPTO_DEV_QAT_C3XXXVF
|
||||
tristate "Support for Intel(R) C3XXX Virtual Function"
|
||||
depends on X86 && PCI
|
||||
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
|
||||
select PCI_IOV
|
||||
select CRYPTO_DEV_QAT
|
||||
help
|
||||
@ -86,7 +86,7 @@ config CRYPTO_DEV_QAT_C3XXXVF
|
||||
|
||||
config CRYPTO_DEV_QAT_C62XVF
|
||||
tristate "Support for Intel(R) C62X Virtual Function"
|
||||
depends on X86 && PCI
|
||||
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
|
||||
select PCI_IOV
|
||||
select CRYPTO_DEV_QAT
|
||||
help
|
||||
|
@ -49,11 +49,6 @@ struct service_hndl {
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
static inline int get_current_node(void)
|
||||
{
|
||||
return topology_physical_package_id(raw_smp_processor_id());
|
||||
}
|
||||
|
||||
int adf_service_register(struct service_hndl *service);
|
||||
int adf_service_unregister(struct service_hndl *service);
|
||||
|
||||
|
@ -605,7 +605,7 @@ static int qat_alg_aead_newkey(struct crypto_aead *tfm, const u8 *key,
|
||||
{
|
||||
struct qat_alg_aead_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct qat_crypto_instance *inst = NULL;
|
||||
int node = get_current_node();
|
||||
int node = numa_node_id();
|
||||
struct device *dev;
|
||||
int ret;
|
||||
|
||||
@ -1065,7 +1065,7 @@ static int qat_alg_skcipher_newkey(struct qat_alg_skcipher_ctx *ctx,
|
||||
{
|
||||
struct qat_crypto_instance *inst = NULL;
|
||||
struct device *dev;
|
||||
int node = get_current_node();
|
||||
int node = numa_node_id();
|
||||
int ret;
|
||||
|
||||
inst = qat_crypto_get_instance_node(node);
|
||||
|
@ -489,7 +489,7 @@ static int qat_dh_init_tfm(struct crypto_kpp *tfm)
|
||||
{
|
||||
struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
|
||||
struct qat_crypto_instance *inst =
|
||||
qat_crypto_get_instance_node(get_current_node());
|
||||
qat_crypto_get_instance_node(numa_node_id());
|
||||
|
||||
if (!inst)
|
||||
return -EINVAL;
|
||||
@ -1225,7 +1225,7 @@ static int qat_rsa_init_tfm(struct crypto_akcipher *tfm)
|
||||
{
|
||||
struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
|
||||
struct qat_crypto_instance *inst =
|
||||
qat_crypto_get_instance_node(get_current_node());
|
||||
qat_crypto_get_instance_node(numa_node_id());
|
||||
|
||||
if (!inst)
|
||||
return -EINVAL;
|
||||
|
@ -31,7 +31,7 @@
|
||||
#define FSCRYPT_CONTEXT_V2 2
|
||||
|
||||
/* Keep this in sync with include/uapi/linux/fscrypt.h */
|
||||
#define FSCRYPT_MODE_MAX FSCRYPT_MODE_ADIANTUM
|
||||
#define FSCRYPT_MODE_MAX FSCRYPT_MODE_AES_256_HCTR2
|
||||
|
||||
struct fscrypt_context_v1 {
|
||||
u8 version; /* FSCRYPT_CONTEXT_V1 */
|
||||
|
@ -53,6 +53,13 @@ struct fscrypt_mode fscrypt_modes[] = {
|
||||
.ivsize = 32,
|
||||
.blk_crypto_mode = BLK_ENCRYPTION_MODE_ADIANTUM,
|
||||
},
|
||||
[FSCRYPT_MODE_AES_256_HCTR2] = {
|
||||
.friendly_name = "AES-256-HCTR2",
|
||||
.cipher_str = "hctr2(aes)",
|
||||
.keysize = 32,
|
||||
.security_strength = 32,
|
||||
.ivsize = 32,
|
||||
},
|
||||
};
|
||||
|
||||
static DEFINE_MUTEX(fscrypt_mode_key_setup_mutex);
|
||||
|
@ -61,7 +61,7 @@ fscrypt_get_dummy_policy(struct super_block *sb)
|
||||
return sb->s_cop->get_dummy_policy(sb);
|
||||
}
|
||||
|
||||
static bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode)
|
||||
static bool fscrypt_valid_enc_modes_v1(u32 contents_mode, u32 filenames_mode)
|
||||
{
|
||||
if (contents_mode == FSCRYPT_MODE_AES_256_XTS &&
|
||||
filenames_mode == FSCRYPT_MODE_AES_256_CTS)
|
||||
@ -78,6 +78,14 @@ static bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool fscrypt_valid_enc_modes_v2(u32 contents_mode, u32 filenames_mode)
|
||||
{
|
||||
if (contents_mode == FSCRYPT_MODE_AES_256_XTS &&
|
||||
filenames_mode == FSCRYPT_MODE_AES_256_HCTR2)
|
||||
return true;
|
||||
return fscrypt_valid_enc_modes_v1(contents_mode, filenames_mode);
|
||||
}
|
||||
|
||||
static bool supported_direct_key_modes(const struct inode *inode,
|
||||
u32 contents_mode, u32 filenames_mode)
|
||||
{
|
||||
@ -151,7 +159,7 @@ static bool supported_iv_ino_lblk_policy(const struct fscrypt_policy_v2 *policy,
|
||||
static bool fscrypt_supported_v1_policy(const struct fscrypt_policy_v1 *policy,
|
||||
const struct inode *inode)
|
||||
{
|
||||
if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
|
||||
if (!fscrypt_valid_enc_modes_v1(policy->contents_encryption_mode,
|
||||
policy->filenames_encryption_mode)) {
|
||||
fscrypt_warn(inode,
|
||||
"Unsupported encryption modes (contents %d, filenames %d)",
|
||||
@ -187,7 +195,7 @@ static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy,
|
||||
{
|
||||
int count = 0;
|
||||
|
||||
if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
|
||||
if (!fscrypt_valid_enc_modes_v2(policy->contents_encryption_mode,
|
||||
policy->filenames_encryption_mode)) {
|
||||
fscrypt_warn(inode,
|
||||
"Unsupported encryption modes (contents %d, filenames %d)",
|
||||
|
@ -8,7 +8,6 @@
|
||||
#define _CRYPTO_INTERNAL_BLAKE2S_H
|
||||
|
||||
#include <crypto/blake2s.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
void blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
|
||||
@ -19,111 +18,4 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block,
|
||||
|
||||
bool blake2s_selftest(void);
|
||||
|
||||
static inline void blake2s_set_lastblock(struct blake2s_state *state)
|
||||
{
|
||||
state->f[0] = -1;
|
||||
}
|
||||
|
||||
/* Helper functions for BLAKE2s shared by the library and shash APIs */
|
||||
|
||||
static __always_inline void
|
||||
__blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen,
|
||||
bool force_generic)
|
||||
{
|
||||
const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
|
||||
|
||||
if (unlikely(!inlen))
|
||||
return;
|
||||
if (inlen > fill) {
|
||||
memcpy(state->buf + state->buflen, in, fill);
|
||||
if (force_generic)
|
||||
blake2s_compress_generic(state, state->buf, 1,
|
||||
BLAKE2S_BLOCK_SIZE);
|
||||
else
|
||||
blake2s_compress(state, state->buf, 1,
|
||||
BLAKE2S_BLOCK_SIZE);
|
||||
state->buflen = 0;
|
||||
in += fill;
|
||||
inlen -= fill;
|
||||
}
|
||||
if (inlen > BLAKE2S_BLOCK_SIZE) {
|
||||
const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
|
||||
/* Hash one less (full) block than strictly possible */
|
||||
if (force_generic)
|
||||
blake2s_compress_generic(state, in, nblocks - 1,
|
||||
BLAKE2S_BLOCK_SIZE);
|
||||
else
|
||||
blake2s_compress(state, in, nblocks - 1,
|
||||
BLAKE2S_BLOCK_SIZE);
|
||||
in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
|
||||
inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
|
||||
}
|
||||
memcpy(state->buf + state->buflen, in, inlen);
|
||||
state->buflen += inlen;
|
||||
}
|
||||
|
||||
static __always_inline void
|
||||
__blake2s_final(struct blake2s_state *state, u8 *out, bool force_generic)
|
||||
{
|
||||
blake2s_set_lastblock(state);
|
||||
memset(state->buf + state->buflen, 0,
|
||||
BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
|
||||
if (force_generic)
|
||||
blake2s_compress_generic(state, state->buf, 1, state->buflen);
|
||||
else
|
||||
blake2s_compress(state, state->buf, 1, state->buflen);
|
||||
cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
|
||||
memcpy(out, state->h, state->outlen);
|
||||
}
|
||||
|
||||
/* Helper functions for shash implementations of BLAKE2s */
|
||||
|
||||
struct blake2s_tfm_ctx {
|
||||
u8 key[BLAKE2S_KEY_SIZE];
|
||||
unsigned int keylen;
|
||||
};
|
||||
|
||||
static inline int crypto_blake2s_setkey(struct crypto_shash *tfm,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
|
||||
|
||||
if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
memcpy(tctx->key, key, keylen);
|
||||
tctx->keylen = keylen;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int crypto_blake2s_init(struct shash_desc *desc)
|
||||
{
|
||||
const struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
struct blake2s_state *state = shash_desc_ctx(desc);
|
||||
unsigned int outlen = crypto_shash_digestsize(desc->tfm);
|
||||
|
||||
__blake2s_init(state, outlen, tctx->key, tctx->keylen);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int crypto_blake2s_update(struct shash_desc *desc,
|
||||
const u8 *in, unsigned int inlen,
|
||||
bool force_generic)
|
||||
{
|
||||
struct blake2s_state *state = shash_desc_ctx(desc);
|
||||
|
||||
__blake2s_update(state, in, inlen, force_generic);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out,
|
||||
bool force_generic)
|
||||
{
|
||||
struct blake2s_state *state = shash_desc_ctx(desc);
|
||||
|
||||
__blake2s_final(state, out, force_generic);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* _CRYPTO_INTERNAL_BLAKE2S_H */
|
||||
|
22
include/crypto/polyval.h
Normal file
22
include/crypto/polyval.h
Normal file
@ -0,0 +1,22 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Common values for the Polyval hash algorithm
|
||||
*
|
||||
* Copyright 2021 Google LLC
|
||||
*/
|
||||
|
||||
#ifndef _CRYPTO_POLYVAL_H
|
||||
#define _CRYPTO_POLYVAL_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/crypto.h>
|
||||
|
||||
#define POLYVAL_BLOCK_SIZE 16
|
||||
#define POLYVAL_DIGEST_SIZE 16
|
||||
|
||||
void polyval_mul_non4k(u8 *op1, const u8 *op2);
|
||||
|
||||
void polyval_update_non4k(const u8 *key, const u8 *in,
|
||||
size_t nblocks, u8 *accumulator);
|
||||
|
||||
#endif
|
@ -265,6 +265,12 @@ struct hisi_qm_list {
|
||||
void (*unregister_from_crypto)(struct hisi_qm *qm);
|
||||
};
|
||||
|
||||
struct hisi_qm_poll_data {
|
||||
struct hisi_qm *qm;
|
||||
struct work_struct work;
|
||||
u16 *qp_finish_id;
|
||||
};
|
||||
|
||||
struct hisi_qm {
|
||||
enum qm_hw_ver ver;
|
||||
enum qm_fun_type fun_type;
|
||||
@ -302,6 +308,7 @@ struct hisi_qm {
|
||||
struct rw_semaphore qps_lock;
|
||||
struct idr qp_idr;
|
||||
struct hisi_qp *qp_array;
|
||||
struct hisi_qm_poll_data *poll_data;
|
||||
|
||||
struct mutex mailbox_lock;
|
||||
|
||||
@ -312,7 +319,6 @@ struct hisi_qm {
|
||||
u32 error_mask;
|
||||
|
||||
struct workqueue_struct *wq;
|
||||
struct work_struct work;
|
||||
struct work_struct rst_work;
|
||||
struct work_struct cmd_process;
|
||||
|
||||
|
@ -27,7 +27,8 @@
|
||||
#define FSCRYPT_MODE_AES_128_CBC 5
|
||||
#define FSCRYPT_MODE_AES_128_CTS 6
|
||||
#define FSCRYPT_MODE_ADIANTUM 9
|
||||
/* If adding a mode number > 9, update FSCRYPT_MODE_MAX in fscrypt_private.h */
|
||||
#define FSCRYPT_MODE_AES_256_HCTR2 10
|
||||
/* If adding a mode number > 10, update FSCRYPT_MODE_MAX in fscrypt_private.h */
|
||||
|
||||
/*
|
||||
* Legacy policy version; ad-hoc KDF and no key verification.
|
||||
|
@ -4,6 +4,8 @@
|
||||
*/
|
||||
|
||||
#include <crypto/internal/blake2s.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
/*
|
||||
@ -587,5 +589,44 @@ bool __init blake2s_selftest(void)
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; ++i) {
|
||||
enum { TEST_ALIGNMENT = 16 };
|
||||
u8 unaligned_block[BLAKE2S_BLOCK_SIZE + TEST_ALIGNMENT - 1]
|
||||
__aligned(TEST_ALIGNMENT);
|
||||
u8 blocks[BLAKE2S_BLOCK_SIZE * 3];
|
||||
struct blake2s_state state1, state2;
|
||||
|
||||
get_random_bytes(blocks, sizeof(blocks));
|
||||
get_random_bytes(&state, sizeof(state));
|
||||
|
||||
#if defined(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) && \
|
||||
defined(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S)
|
||||
memcpy(&state1, &state, sizeof(state1));
|
||||
memcpy(&state2, &state, sizeof(state2));
|
||||
blake2s_compress(&state1, blocks, 3, BLAKE2S_BLOCK_SIZE);
|
||||
blake2s_compress_generic(&state2, blocks, 3, BLAKE2S_BLOCK_SIZE);
|
||||
if (memcmp(&state1, &state2, sizeof(state1))) {
|
||||
pr_err("blake2s random compress self-test %d: FAIL\n",
|
||||
i + 1);
|
||||
success = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
memcpy(&state1, &state, sizeof(state1));
|
||||
blake2s_compress(&state1, blocks, 1, BLAKE2S_BLOCK_SIZE);
|
||||
for (l = 1; l < TEST_ALIGNMENT; ++l) {
|
||||
memcpy(unaligned_block + l, blocks,
|
||||
BLAKE2S_BLOCK_SIZE);
|
||||
memcpy(&state2, &state, sizeof(state2));
|
||||
blake2s_compress(&state2, unaligned_block + l, 1,
|
||||
BLAKE2S_BLOCK_SIZE);
|
||||
if (memcmp(&state1, &state2, sizeof(state1))) {
|
||||
pr_err("blake2s random compress align %d self-test %d: FAIL\n",
|
||||
l, i + 1);
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
|
@ -16,16 +16,44 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/bug.h>
|
||||
|
||||
static inline void blake2s_set_lastblock(struct blake2s_state *state)
|
||||
{
|
||||
state->f[0] = -1;
|
||||
}
|
||||
|
||||
void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
|
||||
{
|
||||
__blake2s_update(state, in, inlen, false);
|
||||
const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
|
||||
|
||||
if (unlikely(!inlen))
|
||||
return;
|
||||
if (inlen > fill) {
|
||||
memcpy(state->buf + state->buflen, in, fill);
|
||||
blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
|
||||
state->buflen = 0;
|
||||
in += fill;
|
||||
inlen -= fill;
|
||||
}
|
||||
if (inlen > BLAKE2S_BLOCK_SIZE) {
|
||||
const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
|
||||
blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
|
||||
in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
|
||||
inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
|
||||
}
|
||||
memcpy(state->buf + state->buflen, in, inlen);
|
||||
state->buflen += inlen;
|
||||
}
|
||||
EXPORT_SYMBOL(blake2s_update);
|
||||
|
||||
void blake2s_final(struct blake2s_state *state, u8 *out)
|
||||
{
|
||||
WARN_ON(IS_ENABLED(DEBUG) && !out);
|
||||
__blake2s_final(state, out, false);
|
||||
blake2s_set_lastblock(state);
|
||||
memset(state->buf + state->buflen, 0,
|
||||
BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
|
||||
blake2s_compress(state, state->buf, 1, state->buflen);
|
||||
cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
|
||||
memcpy(out, state->h, state->outlen);
|
||||
memzero_explicit(state, sizeof(*state));
|
||||
}
|
||||
EXPORT_SYMBOL(blake2s_final);
|
||||
@ -38,12 +66,7 @@ static int __init blake2s_mod_init(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit blake2s_mod_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_init(blake2s_mod_init);
|
||||
module_exit(blake2s_mod_exit);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_DESCRIPTION("BLAKE2s hash function");
|
||||
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
|
||||
|
@ -138,7 +138,7 @@ void mpi_sub(MPI w, MPI u, MPI v)
|
||||
mpi_add(w, u, vv);
|
||||
mpi_free(vv);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(mpi_sub);
|
||||
|
||||
void mpi_addm(MPI w, MPI u, MPI v, MPI m)
|
||||
{
|
||||
|
@ -82,6 +82,7 @@ void mpi_mul(MPI w, MPI u, MPI v)
|
||||
if (tmp_limb)
|
||||
mpi_free_limb_space(tmp_limb);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mpi_mul);
|
||||
|
||||
void mpi_mulm(MPI w, MPI u, MPI v, MPI m)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user