Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git

This commit is contained in:
Stephen Rothwell 2022-06-28 11:26:37 +10:00
commit ad9dd1674d
57 changed files with 4771 additions and 983 deletions

View File

@ -337,6 +337,7 @@ Currently, the following pairs of encryption modes are supported:
- AES-256-XTS for contents and AES-256-CTS-CBC for filenames
- AES-128-CBC for contents and AES-128-CTS-CBC for filenames
- Adiantum for both contents and filenames
- AES-256-XTS for contents and AES-256-HCTR2 for filenames (v2 policies only)
If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
@ -357,6 +358,17 @@ To use Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled. Also, fast
implementations of ChaCha and NHPoly1305 should be enabled, e.g.
CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM.
AES-256-HCTR2 is another true wide-block encryption mode that is intended for
use on CPUs with dedicated crypto instructions. AES-256-HCTR2 has the property
that a bitflip in the plaintext changes the entire ciphertext. This property
makes it desirable for filename encryption since initialization vectors are
reused within a directory. For more details on AES-256-HCTR2, see the paper
"Length-preserving encryption with HCTR2"
(https://eprint.iacr.org/2021/1441.pdf). To use AES-256-HCTR2,
CONFIG_CRYPTO_HCTR2 must be enabled. Also, fast implementations of XCTR and
POLYVAL should be enabled, e.g. CRYPTO_POLYVAL_ARM64_CE and
CRYPTO_AES_ARM64_CE_BLK for ARM64.
New encryption modes can be added relatively easily, without changes
to individual filesystems. However, authenticated encryption (AE)
modes are not currently supported because of the difficulty of dealing
@ -404,11 +416,11 @@ alternatively has the file's nonce (for `DIRECT_KEY policies`_) or
inode number (for `IV_INO_LBLK_64 policies`_) included in the IVs.
Thus, IV reuse is limited to within a single directory.
With CTS-CBC, the IV reuse means that when the plaintext filenames
share a common prefix at least as long as the cipher block size (16
bytes for AES), the corresponding encrypted filenames will also share
a common prefix. This is undesirable. Adiantum does not have this
weakness, as it is a wide-block encryption mode.
With CTS-CBC, the IV reuse means that when the plaintext filenames share a
common prefix at least as long as the cipher block size (16 bytes for AES), the
corresponding encrypted filenames will also share a common prefix. This is
undesirable. Adiantum and HCTR2 do not have this weakness, as they are
wide-block encryption modes.
All supported filenames encryption modes accept any plaintext length
>= 16 bytes; cipher block alignment is not required. However,

View File

@ -8978,15 +8978,24 @@ F: Documentation/admin-guide/perf/hisi-pcie-pmu.rst
F: Documentation/admin-guide/perf/hisi-pmu.rst
F: drivers/perf/hisilicon
HISILICON QM AND ZIP Controller DRIVER
HISILICON QM DRIVER
M: Weili Qian <qianweili@huawei.com>
M: Zhou Wang <wangzhou1@hisilicon.com>
L: linux-crypto@vger.kernel.org
S: Maintained
F: drivers/crypto/hisilicon/Kconfig
F: drivers/crypto/hisilicon/Makefile
F: drivers/crypto/hisilicon/qm.c
F: drivers/crypto/hisilicon/sgl.c
F: include/linux/hisi_acc_qm.h
HISILICON ZIP Controller DRIVER
M: Yang Shen <shenyang39@huawei.com>
M: Zhou Wang <wangzhou1@hisilicon.com>
L: linux-crypto@vger.kernel.org
S: Maintained
F: Documentation/ABI/testing/debugfs-hisi-zip
F: drivers/crypto/hisilicon/qm.c
F: drivers/crypto/hisilicon/sgl.c
F: drivers/crypto/hisilicon/zip/
F: include/linux/hisi_acc_qm.h
HISILICON ROCE DRIVER
M: Wenpeng Liang <liangwenpeng@huawei.com>

View File

@ -63,7 +63,7 @@ config CRYPTO_SHA512_ARM
using optimized ARM assembler and NEON, when available.
config CRYPTO_BLAKE2S_ARM
tristate "BLAKE2s digest algorithm (ARM)"
bool "BLAKE2s digest algorithm (ARM)"
select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
help
BLAKE2s digest algorithm optimized with ARM scalar instructions. This

View File

@ -9,8 +9,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += blake2s-arm.o
obj-$(if $(CONFIG_CRYPTO_BLAKE2S_ARM),y) += libblake2s-arm.o
obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += libblake2s-arm.o
obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
@ -32,7 +31,6 @@ sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o
sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y)
blake2s-arm-y := blake2s-shash.o
libblake2s-arm-y:= blake2s-core.o blake2s-glue.o
blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o
sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o

View File

@ -1,75 +0,0 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* BLAKE2s digest algorithm, ARM scalar implementation
*
* Copyright 2020 Google LLC
*/
#include <crypto/internal/blake2s.h>
#include <crypto/internal/hash.h>
#include <linux/module.h>
static int crypto_blake2s_update_arm(struct shash_desc *desc,
const u8 *in, unsigned int inlen)
{
return crypto_blake2s_update(desc, in, inlen, false);
}
static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out)
{
return crypto_blake2s_final(desc, out, false);
}
#define BLAKE2S_ALG(name, driver_name, digest_size) \
{ \
.base.cra_name = name, \
.base.cra_driver_name = driver_name, \
.base.cra_priority = 200, \
.base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
.base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \
.base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \
.base.cra_module = THIS_MODULE, \
.digestsize = digest_size, \
.setkey = crypto_blake2s_setkey, \
.init = crypto_blake2s_init, \
.update = crypto_blake2s_update_arm, \
.final = crypto_blake2s_final_arm, \
.descsize = sizeof(struct blake2s_state), \
}
static struct shash_alg blake2s_arm_algs[] = {
BLAKE2S_ALG("blake2s-128", "blake2s-128-arm", BLAKE2S_128_HASH_SIZE),
BLAKE2S_ALG("blake2s-160", "blake2s-160-arm", BLAKE2S_160_HASH_SIZE),
BLAKE2S_ALG("blake2s-224", "blake2s-224-arm", BLAKE2S_224_HASH_SIZE),
BLAKE2S_ALG("blake2s-256", "blake2s-256-arm", BLAKE2S_256_HASH_SIZE),
};
static int __init blake2s_arm_mod_init(void)
{
return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
crypto_register_shashes(blake2s_arm_algs,
ARRAY_SIZE(blake2s_arm_algs)) : 0;
}
static void __exit blake2s_arm_mod_exit(void)
{
if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
crypto_unregister_shashes(blake2s_arm_algs,
ARRAY_SIZE(blake2s_arm_algs));
}
module_init(blake2s_arm_mod_init);
module_exit(blake2s_arm_mod_exit);
MODULE_DESCRIPTION("BLAKE2s digest algorithm, ARM scalar implementation");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
MODULE_ALIAS_CRYPTO("blake2s-128");
MODULE_ALIAS_CRYPTO("blake2s-128-arm");
MODULE_ALIAS_CRYPTO("blake2s-160");
MODULE_ALIAS_CRYPTO("blake2s-160-arm");
MODULE_ALIAS_CRYPTO("blake2s-224");
MODULE_ALIAS_CRYPTO("blake2s-224-arm");
MODULE_ALIAS_CRYPTO("blake2s-256");
MODULE_ALIAS_CRYPTO("blake2s-256-arm");

View File

@ -72,6 +72,11 @@ config CRYPTO_GHASH_ARM64_CE
select CRYPTO_GF128MUL
select CRYPTO_LIB_AES
config CRYPTO_POLYVAL_ARM64_CE
tristate "POLYVAL using ARMv8 Crypto Extensions (for HCTR2)"
depends on KERNEL_MODE_NEON
select CRYPTO_POLYVAL
config CRYPTO_CRCT10DIF_ARM64_CE
tristate "CRCT10DIF digest algorithm using PMULL instructions"
depends on KERNEL_MODE_NEON && CRC_T10DIF
@ -96,13 +101,13 @@ config CRYPTO_AES_ARM64_CE_CCM
select CRYPTO_LIB_AES
config CRYPTO_AES_ARM64_CE_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
tristate "AES in ECB/CBC/CTR/XTS/XCTR modes using ARMv8 Crypto Extensions"
depends on KERNEL_MODE_NEON
select CRYPTO_SKCIPHER
select CRYPTO_AES_ARM64_CE
config CRYPTO_AES_ARM64_NEON_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
tristate "AES in ECB/CBC/CTR/XTS/XCTR modes using NEON instructions"
depends on KERNEL_MODE_NEON
select CRYPTO_SKCIPHER
select CRYPTO_LIB_AES

View File

@ -32,6 +32,9 @@ sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
obj-$(CONFIG_CRYPTO_POLYVAL_ARM64_CE) += polyval-ce.o
polyval-ce-y := polyval-ce-glue.o polyval-ce-core.o
obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o
crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o

View File

@ -34,10 +34,11 @@
#define aes_essiv_cbc_encrypt ce_aes_essiv_cbc_encrypt
#define aes_essiv_cbc_decrypt ce_aes_essiv_cbc_decrypt
#define aes_ctr_encrypt ce_aes_ctr_encrypt
#define aes_xctr_encrypt ce_aes_xctr_encrypt
#define aes_xts_encrypt ce_aes_xts_encrypt
#define aes_xts_decrypt ce_aes_xts_decrypt
#define aes_mac_update ce_aes_mac_update
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS/XCTR using ARMv8 Crypto Extensions");
#else
#define MODE "neon"
#define PRIO 200
@ -50,16 +51,18 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
#define aes_essiv_cbc_encrypt neon_aes_essiv_cbc_encrypt
#define aes_essiv_cbc_decrypt neon_aes_essiv_cbc_decrypt
#define aes_ctr_encrypt neon_aes_ctr_encrypt
#define aes_xctr_encrypt neon_aes_xctr_encrypt
#define aes_xts_encrypt neon_aes_xts_encrypt
#define aes_xts_decrypt neon_aes_xts_decrypt
#define aes_mac_update neon_aes_mac_update
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS/XCTR using ARMv8 NEON");
#endif
#if defined(USE_V8_CRYPTO_EXTENSIONS) || !IS_ENABLED(CONFIG_CRYPTO_AES_ARM64_BS)
MODULE_ALIAS_CRYPTO("ecb(aes)");
MODULE_ALIAS_CRYPTO("cbc(aes)");
MODULE_ALIAS_CRYPTO("ctr(aes)");
MODULE_ALIAS_CRYPTO("xts(aes)");
MODULE_ALIAS_CRYPTO("xctr(aes)");
#endif
MODULE_ALIAS_CRYPTO("cts(cbc(aes))");
MODULE_ALIAS_CRYPTO("essiv(cbc(aes),sha256)");
@ -89,6 +92,9 @@ asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int bytes, u8 ctr[]);
asmlinkage void aes_xctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int bytes, u8 ctr[], int byte_ctr);
asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
int rounds, int bytes, u32 const rk2[], u8 iv[],
int first);
@ -442,6 +448,52 @@ static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req)
return err ?: cbc_decrypt_walk(req, &walk);
}
static int __maybe_unused xctr_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
int err, rounds = 6 + ctx->key_length / 4;
struct skcipher_walk walk;
unsigned int byte_ctr = 0;
err = skcipher_walk_virt(&walk, req, false);
while (walk.nbytes > 0) {
const u8 *src = walk.src.virt.addr;
unsigned int nbytes = walk.nbytes;
u8 *dst = walk.dst.virt.addr;
u8 buf[AES_BLOCK_SIZE];
/*
* If given less than 16 bytes, we must copy the partial block
* into a temporary buffer of 16 bytes to avoid out of bounds
* reads and writes. Furthermore, this code is somewhat unusual
* in that it expects the end of the data to be at the end of
* the temporary buffer, rather than the start of the data at
* the start of the temporary buffer.
*/
if (unlikely(nbytes < AES_BLOCK_SIZE))
src = dst = memcpy(buf + sizeof(buf) - nbytes,
src, nbytes);
else if (nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
kernel_neon_begin();
aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
walk.iv, byte_ctr);
kernel_neon_end();
if (unlikely(nbytes < AES_BLOCK_SIZE))
memcpy(walk.dst.virt.addr,
buf + sizeof(buf) - nbytes, nbytes);
byte_ctr += nbytes;
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
}
static int __maybe_unused ctr_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@ -457,6 +509,14 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req)
u8 *dst = walk.dst.virt.addr;
u8 buf[AES_BLOCK_SIZE];
/*
* If given less than 16 bytes, we must copy the partial block
* into a temporary buffer of 16 bytes to avoid out of bounds
* reads and writes. Furthermore, this code is somewhat unusual
* in that it expects the end of the data to be at the end of
* the temporary buffer, rather than the start of the data at
* the start of the temporary buffer.
*/
if (unlikely(nbytes < AES_BLOCK_SIZE))
src = dst = memcpy(buf + sizeof(buf) - nbytes,
src, nbytes);
@ -669,6 +729,22 @@ static struct skcipher_alg aes_algs[] = { {
.setkey = skcipher_aes_setkey,
.encrypt = ctr_encrypt,
.decrypt = ctr_encrypt,
}, {
.base = {
.cra_name = "xctr(aes)",
.cra_driver_name = "xctr-aes-" MODE,
.cra_priority = PRIO,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.chunksize = AES_BLOCK_SIZE,
.setkey = skcipher_aes_setkey,
.encrypt = xctr_encrypt,
.decrypt = xctr_encrypt,
}, {
.base = {
.cra_name = "xts(aes)",

View File

@ -318,127 +318,211 @@ AES_FUNC_END(aes_cbc_cts_decrypt)
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
.previous
/*
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int bytes, u8 ctr[])
* This macro generates the code for CTR and XCTR mode.
*/
.macro ctr_encrypt xctr
// Arguments
OUT .req x0
IN .req x1
KEY .req x2
ROUNDS_W .req w3
BYTES_W .req w4
IV .req x5
BYTE_CTR_W .req w6 // XCTR only
// Intermediate values
CTR_W .req w11 // XCTR only
CTR .req x11 // XCTR only
IV_PART .req x12
BLOCKS .req x13
BLOCKS_W .req w13
AES_FUNC_START(aes_ctr_encrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
enc_prepare w3, x2, x12
ld1 {vctr.16b}, [x5]
enc_prepare ROUNDS_W, KEY, IV_PART
ld1 {vctr.16b}, [IV]
umov x12, vctr.d[1] /* keep swabbed ctr in reg */
rev x12, x12
/*
* Keep 64 bits of the IV in a register. For CTR mode this lets us
* easily increment the IV. For XCTR mode this lets us efficiently XOR
* the 64-bit counter with the IV.
*/
.if \xctr
umov IV_PART, vctr.d[0]
lsr CTR_W, BYTE_CTR_W, #4
.else
umov IV_PART, vctr.d[1]
rev IV_PART, IV_PART
.endif
.LctrloopNx:
add w7, w4, #15
sub w4, w4, #MAX_STRIDE << 4
lsr w7, w7, #4
.LctrloopNx\xctr:
add BLOCKS_W, BYTES_W, #15
sub BYTES_W, BYTES_W, #MAX_STRIDE << 4
lsr BLOCKS_W, BLOCKS_W, #4
mov w8, #MAX_STRIDE
cmp w7, w8
csel w7, w7, w8, lt
adds x12, x12, x7
cmp BLOCKS_W, w8
csel BLOCKS_W, BLOCKS_W, w8, lt
/*
* Set up the counter values in v0-v{MAX_STRIDE-1}.
*
* If we are encrypting less than MAX_STRIDE blocks, the tail block
* handling code expects the last keystream block to be in
* v{MAX_STRIDE-1}. For example: if encrypting two blocks with
* MAX_STRIDE=5, then v3 and v4 should have the next two counter blocks.
*/
.if \xctr
add CTR, CTR, BLOCKS
.else
adds IV_PART, IV_PART, BLOCKS
.endif
mov v0.16b, vctr.16b
mov v1.16b, vctr.16b
mov v2.16b, vctr.16b
mov v3.16b, vctr.16b
ST5( mov v4.16b, vctr.16b )
bcs 0f
.if \xctr
sub x6, CTR, #MAX_STRIDE - 1
sub x7, CTR, #MAX_STRIDE - 2
sub x8, CTR, #MAX_STRIDE - 3
sub x9, CTR, #MAX_STRIDE - 4
ST5( sub x10, CTR, #MAX_STRIDE - 5 )
eor x6, x6, IV_PART
eor x7, x7, IV_PART
eor x8, x8, IV_PART
eor x9, x9, IV_PART
ST5( eor x10, x10, IV_PART )
mov v0.d[0], x6
mov v1.d[0], x7
mov v2.d[0], x8
mov v3.d[0], x9
ST5( mov v4.d[0], x10 )
.else
bcs 0f
.subsection 1
/*
* This subsection handles carries.
*
* Conditional branching here is allowed with respect to time
* invariance since the branches are dependent on the IV instead
* of the plaintext or key. This code is rarely executed in
* practice anyway.
*/
.subsection 1
/* apply carry to outgoing counter */
0: umov x8, vctr.d[0]
rev x8, x8
add x8, x8, #1
rev x8, x8
ins vctr.d[0], x8
/* Apply carry to outgoing counter. */
0: umov x8, vctr.d[0]
rev x8, x8
add x8, x8, #1
rev x8, x8
ins vctr.d[0], x8
/* apply carry to N counter blocks for N := x12 */
cbz x12, 2f
adr x16, 1f
sub x16, x16, x12, lsl #3
br x16
bti c
mov v0.d[0], vctr.d[0]
bti c
mov v1.d[0], vctr.d[0]
bti c
mov v2.d[0], vctr.d[0]
bti c
mov v3.d[0], vctr.d[0]
ST5( bti c )
ST5( mov v4.d[0], vctr.d[0] )
1: b 2f
.previous
/*
* Apply carry to counter blocks if needed.
*
* Since the carry flag was set, we know 0 <= IV_PART <
* MAX_STRIDE. Using the value of IV_PART we can determine how
* many counter blocks need to be updated.
*/
cbz IV_PART, 2f
adr x16, 1f
sub x16, x16, IV_PART, lsl #3
br x16
bti c
mov v0.d[0], vctr.d[0]
bti c
mov v1.d[0], vctr.d[0]
bti c
mov v2.d[0], vctr.d[0]
bti c
mov v3.d[0], vctr.d[0]
ST5( bti c )
ST5( mov v4.d[0], vctr.d[0] )
1: b 2f
.previous
2: rev x7, x12
ins vctr.d[1], x7
sub x7, x12, #MAX_STRIDE - 1
sub x8, x12, #MAX_STRIDE - 2
sub x9, x12, #MAX_STRIDE - 3
rev x7, x7
rev x8, x8
mov v1.d[1], x7
rev x9, x9
ST5( sub x10, x12, #MAX_STRIDE - 4 )
mov v2.d[1], x8
ST5( rev x10, x10 )
mov v3.d[1], x9
ST5( mov v4.d[1], x10 )
tbnz w4, #31, .Lctrtail
ld1 {v5.16b-v7.16b}, [x1], #48
2: rev x7, IV_PART
ins vctr.d[1], x7
sub x7, IV_PART, #MAX_STRIDE - 1
sub x8, IV_PART, #MAX_STRIDE - 2
sub x9, IV_PART, #MAX_STRIDE - 3
rev x7, x7
rev x8, x8
mov v1.d[1], x7
rev x9, x9
ST5( sub x10, IV_PART, #MAX_STRIDE - 4 )
mov v2.d[1], x8
ST5( rev x10, x10 )
mov v3.d[1], x9
ST5( mov v4.d[1], x10 )
.endif
/*
* If there are at least MAX_STRIDE blocks left, XOR the data with
* keystream and store. Otherwise jump to tail handling.
*/
tbnz BYTES_W, #31, .Lctrtail\xctr
ld1 {v5.16b-v7.16b}, [IN], #48
ST4( bl aes_encrypt_block4x )
ST5( bl aes_encrypt_block5x )
eor v0.16b, v5.16b, v0.16b
ST4( ld1 {v5.16b}, [x1], #16 )
ST4( ld1 {v5.16b}, [IN], #16 )
eor v1.16b, v6.16b, v1.16b
ST5( ld1 {v5.16b-v6.16b}, [x1], #32 )
ST5( ld1 {v5.16b-v6.16b}, [IN], #32 )
eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b
ST5( eor v4.16b, v6.16b, v4.16b )
st1 {v0.16b-v3.16b}, [x0], #64
ST5( st1 {v4.16b}, [x0], #16 )
cbz w4, .Lctrout
b .LctrloopNx
st1 {v0.16b-v3.16b}, [OUT], #64
ST5( st1 {v4.16b}, [OUT], #16 )
cbz BYTES_W, .Lctrout\xctr
b .LctrloopNx\xctr
.Lctrout:
st1 {vctr.16b}, [x5] /* return next CTR value */
.Lctrout\xctr:
.if !\xctr
st1 {vctr.16b}, [IV] /* return next CTR value */
.endif
ldp x29, x30, [sp], #16
ret
.Lctrtail:
/* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */
.Lctrtail\xctr:
/*
* Handle up to MAX_STRIDE * 16 - 1 bytes of plaintext
*
* This code expects the last keystream block to be in v{MAX_STRIDE-1}.
* For example: if encrypting two blocks with MAX_STRIDE=5, then v3 and
* v4 should have the next two counter blocks.
*
* This allows us to store the ciphertext by writing to overlapping
* regions of memory. Any invalid ciphertext blocks get overwritten by
* correctly computed blocks. This approach greatly simplifies the
* logic for storing the ciphertext.
*/
mov x16, #16
ands x6, x4, #0xf
csel x13, x6, x16, ne
ands w7, BYTES_W, #0xf
csel x13, x7, x16, ne
ST5( cmp w4, #64 - (MAX_STRIDE << 4) )
ST5( cmp BYTES_W, #64 - (MAX_STRIDE << 4))
ST5( csel x14, x16, xzr, gt )
cmp w4, #48 - (MAX_STRIDE << 4)
cmp BYTES_W, #48 - (MAX_STRIDE << 4)
csel x15, x16, xzr, gt
cmp w4, #32 - (MAX_STRIDE << 4)
cmp BYTES_W, #32 - (MAX_STRIDE << 4)
csel x16, x16, xzr, gt
cmp w4, #16 - (MAX_STRIDE << 4)
cmp BYTES_W, #16 - (MAX_STRIDE << 4)
adr_l x12, .Lcts_permute_table
add x12, x12, x13
ble .Lctrtail1x
adr_l x9, .Lcts_permute_table
add x9, x9, x13
ble .Lctrtail1x\xctr
ST5( ld1 {v5.16b}, [x1], x14 )
ld1 {v6.16b}, [x1], x15
ld1 {v7.16b}, [x1], x16
ST5( ld1 {v5.16b}, [IN], x14 )
ld1 {v6.16b}, [IN], x15
ld1 {v7.16b}, [IN], x16
ST4( bl aes_encrypt_block4x )
ST5( bl aes_encrypt_block5x )
ld1 {v8.16b}, [x1], x13
ld1 {v9.16b}, [x1]
ld1 {v10.16b}, [x12]
ld1 {v8.16b}, [IN], x13
ld1 {v9.16b}, [IN]
ld1 {v10.16b}, [x9]
ST4( eor v6.16b, v6.16b, v0.16b )
ST4( eor v7.16b, v7.16b, v1.16b )
@ -453,32 +537,91 @@ ST5( eor v7.16b, v7.16b, v2.16b )
ST5( eor v8.16b, v8.16b, v3.16b )
ST5( eor v9.16b, v9.16b, v4.16b )
ST5( st1 {v5.16b}, [x0], x14 )
st1 {v6.16b}, [x0], x15
st1 {v7.16b}, [x0], x16
add x13, x13, x0
ST5( st1 {v5.16b}, [OUT], x14 )
st1 {v6.16b}, [OUT], x15
st1 {v7.16b}, [OUT], x16
add x13, x13, OUT
st1 {v9.16b}, [x13] // overlapping stores
st1 {v8.16b}, [x0]
b .Lctrout
st1 {v8.16b}, [OUT]
b .Lctrout\xctr
.Lctrtail1x:
sub x7, x6, #16
csel x6, x6, x7, eq
add x1, x1, x6
add x0, x0, x6
ld1 {v5.16b}, [x1]
ld1 {v6.16b}, [x0]
.Lctrtail1x\xctr:
/*
* Handle <= 16 bytes of plaintext
*
* This code always reads and writes 16 bytes. To avoid out of bounds
* accesses, XCTR and CTR modes must use a temporary buffer when
* encrypting/decrypting less than 16 bytes.
*
* This code is unusual in that it loads the input and stores the output
* relative to the end of the buffers rather than relative to the start.
* This causes unusual behaviour when encrypting/decrypting less than 16
* bytes; the end of the data is expected to be at the end of the
* temporary buffer rather than the start of the data being at the start
* of the temporary buffer.
*/
sub x8, x7, #16
csel x7, x7, x8, eq
add IN, IN, x7
add OUT, OUT, x7
ld1 {v5.16b}, [IN]
ld1 {v6.16b}, [OUT]
ST5( mov v3.16b, v4.16b )
encrypt_block v3, w3, x2, x8, w7
ld1 {v10.16b-v11.16b}, [x12]
encrypt_block v3, ROUNDS_W, KEY, x8, w7
ld1 {v10.16b-v11.16b}, [x9]
tbl v3.16b, {v3.16b}, v10.16b
sshr v11.16b, v11.16b, #7
eor v5.16b, v5.16b, v3.16b
bif v5.16b, v6.16b, v11.16b
st1 {v5.16b}, [x0]
b .Lctrout
st1 {v5.16b}, [OUT]
b .Lctrout\xctr
// Arguments
.unreq OUT
.unreq IN
.unreq KEY
.unreq ROUNDS_W
.unreq BYTES_W
.unreq IV
.unreq BYTE_CTR_W // XCTR only
// Intermediate values
.unreq CTR_W // XCTR only
.unreq CTR // XCTR only
.unreq IV_PART
.unreq BLOCKS
.unreq BLOCKS_W
.endm
/*
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int bytes, u8 ctr[])
*
* The input and output buffers must always be at least 16 bytes even if
* encrypting/decrypting less than 16 bytes. Otherwise out of bounds
* accesses will occur. The data to be encrypted/decrypted is expected
* to be at the end of this 16-byte temporary buffer rather than the
* start.
*/
AES_FUNC_START(aes_ctr_encrypt)
ctr_encrypt 0
AES_FUNC_END(aes_ctr_encrypt)
/*
* aes_xctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int bytes, u8 const iv[], int byte_ctr)
*
* The input and output buffers must always be at least 16 bytes even if
* encrypting/decrypting less than 16 bytes. Otherwise out of bounds
* accesses will occur. The data to be encrypted/decrypted is expected
* to be at the end of this 16-byte temporary buffer rather than the
* start.
*/
AES_FUNC_START(aes_xctr_encrypt)
ctr_encrypt 1
AES_FUNC_END(aes_xctr_encrypt)
/*
* aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,

View File

@ -0,0 +1,361 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Implementation of POLYVAL using ARMv8 Crypto Extensions.
*
* Copyright 2021 Google LLC
*/
/*
* This is an efficient implementation of POLYVAL using ARMv8 Crypto Extensions
* It works on 8 blocks at a time, by precomputing the first 8 keys powers h^8,
* ..., h^1 in the POLYVAL finite field. This precomputation allows us to split
* finite field multiplication into two steps.
*
* In the first step, we consider h^i, m_i as normal polynomials of degree less
* than 128. We then compute p(x) = h^8m_0 + ... + h^1m_7 where multiplication
* is simply polynomial multiplication.
*
* In the second step, we compute the reduction of p(x) modulo the finite field
* modulus g(x) = x^128 + x^127 + x^126 + x^121 + 1.
*
* This two step process is equivalent to computing h^8m_0 + ... + h^1m_7 where
* multiplication is finite field multiplication. The advantage is that the
* two-step process only requires 1 finite field reduction for every 8
* polynomial multiplications. Further parallelism is gained by interleaving the
* multiplications and polynomial reductions.
*/
#include <linux/linkage.h>
#define STRIDE_BLOCKS 8
KEY_POWERS .req x0
MSG .req x1
BLOCKS_LEFT .req x2
ACCUMULATOR .req x3
KEY_START .req x10
EXTRA_BYTES .req x11
TMP .req x13
M0 .req v0
M1 .req v1
M2 .req v2
M3 .req v3
M4 .req v4
M5 .req v5
M6 .req v6
M7 .req v7
KEY8 .req v8
KEY7 .req v9
KEY6 .req v10
KEY5 .req v11
KEY4 .req v12
KEY3 .req v13
KEY2 .req v14
KEY1 .req v15
PL .req v16
PH .req v17
TMP_V .req v18
LO .req v20
MI .req v21
HI .req v22
SUM .req v23
GSTAR .req v24
.text
.arch armv8-a+crypto
.align 4
.Lgstar:
.quad 0xc200000000000000, 0xc200000000000000
/*
* Computes the product of two 128-bit polynomials in X and Y and XORs the
* components of the 256-bit product into LO, MI, HI.
*
* Given:
* X = [X_1 : X_0]
* Y = [Y_1 : Y_0]
*
* We compute:
* LO += X_0 * Y_0
* MI += (X_0 + X_1) * (Y_0 + Y_1)
* HI += X_1 * Y_1
*
* Later, the 256-bit result can be extracted as:
* [HI_1 : HI_0 + HI_1 + MI_1 + LO_1 : LO_1 + HI_0 + MI_0 + LO_0 : LO_0]
* This step is done when computing the polynomial reduction for efficiency
* reasons.
*
* Karatsuba multiplication is used instead of Schoolbook multiplication because
* it was found to be slightly faster on ARM64 CPUs.
*
*/
.macro karatsuba1 X Y
X .req \X
Y .req \Y
ext v25.16b, X.16b, X.16b, #8
ext v26.16b, Y.16b, Y.16b, #8
eor v25.16b, v25.16b, X.16b
eor v26.16b, v26.16b, Y.16b
pmull2 v28.1q, X.2d, Y.2d
pmull v29.1q, X.1d, Y.1d
pmull v27.1q, v25.1d, v26.1d
eor HI.16b, HI.16b, v28.16b
eor LO.16b, LO.16b, v29.16b
eor MI.16b, MI.16b, v27.16b
.unreq X
.unreq Y
.endm
/*
* Same as karatsuba1, except overwrites HI, LO, MI rather than XORing into
* them.
*/
.macro karatsuba1_store X Y
X .req \X
Y .req \Y
ext v25.16b, X.16b, X.16b, #8
ext v26.16b, Y.16b, Y.16b, #8
eor v25.16b, v25.16b, X.16b
eor v26.16b, v26.16b, Y.16b
pmull2 HI.1q, X.2d, Y.2d
pmull LO.1q, X.1d, Y.1d
pmull MI.1q, v25.1d, v26.1d
.unreq X
.unreq Y
.endm
/*
* Computes the 256-bit polynomial represented by LO, HI, MI. Stores
* the result in PL, PH.
* [PH : PL] =
* [HI_1 : HI_1 + HI_0 + MI_1 + LO_1 : HI_0 + MI_0 + LO_1 + LO_0 : LO_0]
*/
.macro karatsuba2
// v4 = [HI_1 + MI_1 : HI_0 + MI_0]
eor v4.16b, HI.16b, MI.16b
// v4 = [HI_1 + MI_1 + LO_1 : HI_0 + MI_0 + LO_0]
eor v4.16b, v4.16b, LO.16b
// v5 = [HI_0 : LO_1]
ext v5.16b, LO.16b, HI.16b, #8
// v4 = [HI_1 + HI_0 + MI_1 + LO_1 : HI_0 + MI_0 + LO_1 + LO_0]
eor v4.16b, v4.16b, v5.16b
// HI = [HI_0 : HI_1]
ext HI.16b, HI.16b, HI.16b, #8
// LO = [LO_0 : LO_1]
ext LO.16b, LO.16b, LO.16b, #8
// PH = [HI_1 : HI_1 + HI_0 + MI_1 + LO_1]
ext PH.16b, v4.16b, HI.16b, #8
// PL = [HI_0 + MI_0 + LO_1 + LO_0 : LO_0]
ext PL.16b, LO.16b, v4.16b, #8
.endm
/*
* Computes the 128-bit reduction of PH : PL. Stores the result in dest.
*
* This macro computes p(x) mod g(x) where p(x) is in montgomery form and g(x) =
* x^128 + x^127 + x^126 + x^121 + 1.
*
* We have a 256-bit polynomial PH : PL = P_3 : P_2 : P_1 : P_0 that is the
* product of two 128-bit polynomials in Montgomery form. We need to reduce it
* mod g(x). Also, since polynomials in Montgomery form have an "extra" factor
* of x^128, this product has two extra factors of x^128. To get it back into
* Montgomery form, we need to remove one of these factors by dividing by x^128.
*
* To accomplish both of these goals, we add multiples of g(x) that cancel out
* the low 128 bits P_1 : P_0, leaving just the high 128 bits. Since the low
* bits are zero, the polynomial division by x^128 can be done by right
* shifting.
*
* Since the only nonzero term in the low 64 bits of g(x) is the constant term,
* the multiple of g(x) needed to cancel out P_0 is P_0 * g(x). The CPU can
* only do 64x64 bit multiplications, so split P_0 * g(x) into x^128 * P_0 +
* x^64 * g*(x) * P_0 + P_0, where g*(x) is bits 64-127 of g(x). Adding this to
* the original polynomial gives P_3 : P_2 + P_0 + T_1 : P_1 + T_0 : 0, where T
* = T_1 : T_0 = g*(x) * P_0. Thus, bits 0-63 got "folded" into bits 64-191.
*
* Repeating this same process on the next 64 bits "folds" bits 64-127 into bits
* 128-255, giving the answer in bits 128-255. This time, we need to cancel P_1
* + T_0 in bits 64-127. The multiple of g(x) required is (P_1 + T_0) * g(x) *
* x^64. Adding this to our previous computation gives P_3 + P_1 + T_0 + V_1 :
* P_2 + P_0 + T_1 + V_0 : 0 : 0, where V = V_1 : V_0 = g*(x) * (P_1 + T_0).
*
* So our final computation is:
* T = T_1 : T_0 = g*(x) * P_0
* V = V_1 : V_0 = g*(x) * (P_1 + T_0)
* p(x) / x^{128} mod g(x) = P_3 + P_1 + T_0 + V_1 : P_2 + P_0 + T_1 + V_0
*
* The implementation below saves a XOR instruction by computing P_1 + T_0 : P_0
* + T_1 and XORing into dest, rather than separately XORing P_1 : P_0 and T_0 :
* T_1 into dest. This allows us to reuse P_1 + T_0 when computing V.
*/
.macro montgomery_reduction dest
DEST .req \dest
// TMP_V = T_1 : T_0 = P_0 * g*(x)
pmull TMP_V.1q, PL.1d, GSTAR.1d
// TMP_V = T_0 : T_1
ext TMP_V.16b, TMP_V.16b, TMP_V.16b, #8
// TMP_V = P_1 + T_0 : P_0 + T_1
eor TMP_V.16b, PL.16b, TMP_V.16b
// PH = P_3 + P_1 + T_0 : P_2 + P_0 + T_1
eor PH.16b, PH.16b, TMP_V.16b
// TMP_V = V_1 : V_0 = (P_1 + T_0) * g*(x)
pmull2 TMP_V.1q, TMP_V.2d, GSTAR.2d
eor DEST.16b, PH.16b, TMP_V.16b
.unreq DEST
.endm
/*
* Compute Polyval on 8 blocks.
*
* If reduce is set, also computes the montgomery reduction of the
* previous full_stride call and XORs with the first message block.
* (m_0 + REDUCE(PL, PH))h^8 + ... + m_7h^1.
* I.e., the first multiplication uses m_0 + REDUCE(PL, PH) instead of m_0.
*
* Sets PL, PH.
*/
.macro full_stride reduce
eor LO.16b, LO.16b, LO.16b
eor MI.16b, MI.16b, MI.16b
eor HI.16b, HI.16b, HI.16b
ld1 {M0.16b, M1.16b, M2.16b, M3.16b}, [MSG], #64
ld1 {M4.16b, M5.16b, M6.16b, M7.16b}, [MSG], #64
karatsuba1 M7 KEY1
.if \reduce
pmull TMP_V.1q, PL.1d, GSTAR.1d
.endif
karatsuba1 M6 KEY2
.if \reduce
ext TMP_V.16b, TMP_V.16b, TMP_V.16b, #8
.endif
karatsuba1 M5 KEY3
.if \reduce
eor TMP_V.16b, PL.16b, TMP_V.16b
.endif
karatsuba1 M4 KEY4
.if \reduce
eor PH.16b, PH.16b, TMP_V.16b
.endif
karatsuba1 M3 KEY5
.if \reduce
pmull2 TMP_V.1q, TMP_V.2d, GSTAR.2d
.endif
karatsuba1 M2 KEY6
.if \reduce
eor SUM.16b, PH.16b, TMP_V.16b
.endif
karatsuba1 M1 KEY7
eor M0.16b, M0.16b, SUM.16b
karatsuba1 M0 KEY8
karatsuba2
.endm
/*
* Handle any extra blocks after full_stride loop.
*/
.macro partial_stride
add KEY_POWERS, KEY_START, #(STRIDE_BLOCKS << 4)
sub KEY_POWERS, KEY_POWERS, BLOCKS_LEFT, lsl #4
ld1 {KEY1.16b}, [KEY_POWERS], #16
ld1 {TMP_V.16b}, [MSG], #16
eor SUM.16b, SUM.16b, TMP_V.16b
karatsuba1_store KEY1 SUM
sub BLOCKS_LEFT, BLOCKS_LEFT, #1
tst BLOCKS_LEFT, #4
beq .Lpartial4BlocksDone
ld1 {M0.16b, M1.16b, M2.16b, M3.16b}, [MSG], #64
ld1 {KEY8.16b, KEY7.16b, KEY6.16b, KEY5.16b}, [KEY_POWERS], #64
karatsuba1 M0 KEY8
karatsuba1 M1 KEY7
karatsuba1 M2 KEY6
karatsuba1 M3 KEY5
.Lpartial4BlocksDone:
tst BLOCKS_LEFT, #2
beq .Lpartial2BlocksDone
ld1 {M0.16b, M1.16b}, [MSG], #32
ld1 {KEY8.16b, KEY7.16b}, [KEY_POWERS], #32
karatsuba1 M0 KEY8
karatsuba1 M1 KEY7
.Lpartial2BlocksDone:
tst BLOCKS_LEFT, #1
beq .LpartialDone
ld1 {M0.16b}, [MSG], #16
ld1 {KEY8.16b}, [KEY_POWERS], #16
karatsuba1 M0 KEY8
.LpartialDone:
karatsuba2
montgomery_reduction SUM
.endm
/*
* Perform montgomery multiplication in GF(2^128) and store result in op1.
*
* Computes op1*op2*x^{-128} mod x^128 + x^127 + x^126 + x^121 + 1
* If op1, op2 are in montgomery form, this computes the montgomery
* form of op1*op2.
*
* void pmull_polyval_mul(u8 *op1, const u8 *op2);
*/
SYM_FUNC_START(pmull_polyval_mul)
adr TMP, .Lgstar
ld1 {GSTAR.2d}, [TMP]
ld1 {v0.16b}, [x0]
ld1 {v1.16b}, [x1]
karatsuba1_store v0 v1
karatsuba2
montgomery_reduction SUM
st1 {SUM.16b}, [x0]
ret
SYM_FUNC_END(pmull_polyval_mul)
/*
* Perform polynomial evaluation as specified by POLYVAL. This computes:
* h^n * accumulator + h^n * m_0 + ... + h^1 * m_{n-1}
* where n=nblocks, h is the hash key, and m_i are the message blocks.
*
* x0 - pointer to precomputed key powers h^8 ... h^1
* x1 - pointer to message blocks
* x2 - number of blocks to hash
* x3 - pointer to accumulator
*
* void pmull_polyval_update(const struct polyval_ctx *ctx, const u8 *in,
* size_t nblocks, u8 *accumulator);
*/
SYM_FUNC_START(pmull_polyval_update)
adr TMP, .Lgstar
mov KEY_START, KEY_POWERS
ld1 {GSTAR.2d}, [TMP]
ld1 {SUM.16b}, [ACCUMULATOR]
subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
blt .LstrideLoopExit
ld1 {KEY8.16b, KEY7.16b, KEY6.16b, KEY5.16b}, [KEY_POWERS], #64
ld1 {KEY4.16b, KEY3.16b, KEY2.16b, KEY1.16b}, [KEY_POWERS], #64
full_stride 0
subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
blt .LstrideLoopExitReduce
.LstrideLoop:
full_stride 1
subs BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
bge .LstrideLoop
.LstrideLoopExitReduce:
montgomery_reduction SUM
.LstrideLoopExit:
adds BLOCKS_LEFT, BLOCKS_LEFT, #STRIDE_BLOCKS
beq .LskipPartial
partial_stride
.LskipPartial:
st1 {SUM.16b}, [ACCUMULATOR]
ret
SYM_FUNC_END(pmull_polyval_update)

View File

@ -0,0 +1,191 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Glue code for POLYVAL using ARMv8 Crypto Extensions
*
* Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
* Copyright (c) 2009 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
* Copyright 2021 Google LLC
*/
/*
* Glue code based on ghash-clmulni-intel_glue.c.
*
* This implementation of POLYVAL uses montgomery multiplication accelerated by
* ARMv8 Crypto Extensions instructions to implement the finite field operations.
*/
#include <crypto/algapi.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/polyval.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/cpufeature.h>
#include <asm/neon.h>
#include <asm/simd.h>
#define NUM_KEY_POWERS 8
struct polyval_tfm_ctx {
/*
* These powers must be in the order h^8, ..., h^1.
*/
u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE];
};
struct polyval_desc_ctx {
u8 buffer[POLYVAL_BLOCK_SIZE];
u32 bytes;
};
asmlinkage void pmull_polyval_update(const struct polyval_tfm_ctx *keys,
const u8 *in, size_t nblocks, u8 *accumulator);
asmlinkage void pmull_polyval_mul(u8 *op1, const u8 *op2);
static void internal_polyval_update(const struct polyval_tfm_ctx *keys,
const u8 *in, size_t nblocks, u8 *accumulator)
{
if (likely(crypto_simd_usable())) {
kernel_neon_begin();
pmull_polyval_update(keys, in, nblocks, accumulator);
kernel_neon_end();
} else {
polyval_update_non4k(keys->key_powers[NUM_KEY_POWERS-1], in,
nblocks, accumulator);
}
}
static void internal_polyval_mul(u8 *op1, const u8 *op2)
{
if (likely(crypto_simd_usable())) {
kernel_neon_begin();
pmull_polyval_mul(op1, op2);
kernel_neon_end();
} else {
polyval_mul_non4k(op1, op2);
}
}
static int polyval_arm64_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
struct polyval_tfm_ctx *tctx = crypto_shash_ctx(tfm);
int i;
if (keylen != POLYVAL_BLOCK_SIZE)
return -EINVAL;
memcpy(tctx->key_powers[NUM_KEY_POWERS-1], key, POLYVAL_BLOCK_SIZE);
for (i = NUM_KEY_POWERS-2; i >= 0; i--) {
memcpy(tctx->key_powers[i], key, POLYVAL_BLOCK_SIZE);
internal_polyval_mul(tctx->key_powers[i],
tctx->key_powers[i+1]);
}
return 0;
}
static int polyval_arm64_init(struct shash_desc *desc)
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
memset(dctx, 0, sizeof(*dctx));
return 0;
}
static int polyval_arm64_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
u8 *pos;
unsigned int nblocks;
unsigned int n;
if (dctx->bytes) {
n = min(srclen, dctx->bytes);
pos = dctx->buffer + POLYVAL_BLOCK_SIZE - dctx->bytes;
dctx->bytes -= n;
srclen -= n;
while (n--)
*pos++ ^= *src++;
if (!dctx->bytes)
internal_polyval_mul(dctx->buffer,
tctx->key_powers[NUM_KEY_POWERS-1]);
}
while (srclen >= POLYVAL_BLOCK_SIZE) {
/* allow rescheduling every 4K bytes */
nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE;
internal_polyval_update(tctx, src, nblocks, dctx->buffer);
srclen -= nblocks * POLYVAL_BLOCK_SIZE;
src += nblocks * POLYVAL_BLOCK_SIZE;
}
if (srclen) {
dctx->bytes = POLYVAL_BLOCK_SIZE - srclen;
pos = dctx->buffer;
while (srclen--)
*pos++ ^= *src++;
}
return 0;
}
static int polyval_arm64_final(struct shash_desc *desc, u8 *dst)
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
if (dctx->bytes) {
internal_polyval_mul(dctx->buffer,
tctx->key_powers[NUM_KEY_POWERS-1]);
}
memcpy(dst, dctx->buffer, POLYVAL_BLOCK_SIZE);
return 0;
}
static struct shash_alg polyval_alg = {
.digestsize = POLYVAL_DIGEST_SIZE,
.init = polyval_arm64_init,
.update = polyval_arm64_update,
.final = polyval_arm64_final,
.setkey = polyval_arm64_setkey,
.descsize = sizeof(struct polyval_desc_ctx),
.base = {
.cra_name = "polyval",
.cra_driver_name = "polyval-ce",
.cra_priority = 200,
.cra_blocksize = POLYVAL_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct polyval_tfm_ctx),
.cra_module = THIS_MODULE,
},
};
static int __init polyval_ce_mod_init(void)
{
return crypto_register_shash(&polyval_alg);
}
static void __exit polyval_ce_mod_exit(void)
{
crypto_unregister_shash(&polyval_alg);
}
module_cpu_feature_match(PMULL, polyval_ce_mod_init)
module_exit(polyval_ce_mod_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("POLYVAL hash function accelerated by ARMv8 Crypto Extensions");
MODULE_ALIAS_CRYPTO("polyval");
MODULE_ALIAS_CRYPTO("polyval-ce");

View File

@ -61,14 +61,15 @@ sha256-ssse3-$(CONFIG_AS_SHA256_NI) += sha256_ni_asm.o
obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o
blake2s-x86_64-y := blake2s-shash.o
obj-$(if $(CONFIG_CRYPTO_BLAKE2S_X86),y) += libblake2s-x86_64.o
obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += libblake2s-x86_64.o
libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
obj-$(CONFIG_CRYPTO_POLYVAL_CLMUL_NI) += polyval-clmulni.o
polyval-clmulni-y := polyval-clmulni_asm.o polyval-clmulni_glue.o
obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
crc32c-intel-y := crc32c-intel_glue.o
crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o

View File

@ -23,6 +23,11 @@
#define VMOVDQ vmovdqu
/*
* Note: the "x" prefix in these aliases means "this is an xmm register". The
* alias prefixes have no relation to XCTR where the "X" prefix means "XOR
* counter".
*/
#define xdata0 %xmm0
#define xdata1 %xmm1
#define xdata2 %xmm2
@ -31,8 +36,10 @@
#define xdata5 %xmm5
#define xdata6 %xmm6
#define xdata7 %xmm7
#define xcounter %xmm8
#define xbyteswap %xmm9
#define xcounter %xmm8 // CTR mode only
#define xiv %xmm8 // XCTR mode only
#define xbyteswap %xmm9 // CTR mode only
#define xtmp %xmm9 // XCTR mode only
#define xkey0 %xmm10
#define xkey4 %xmm11
#define xkey8 %xmm12
@ -45,7 +52,7 @@
#define p_keys %rdx
#define p_out %rcx
#define num_bytes %r8
#define counter %r9 // XCTR mode only
#define tmp %r10
#define DDQ_DATA 0
#define XDATA 1
@ -102,7 +109,7 @@ ddq_add_8:
* do_aes num_in_par load_keys key_len
* This increments p_in, but not p_out
*/
.macro do_aes b, k, key_len
.macro do_aes b, k, key_len, xctr
.set by, \b
.set load_keys, \k
.set klen, \key_len
@ -111,29 +118,48 @@ ddq_add_8:
vmovdqa 0*16(p_keys), xkey0
.endif
vpshufb xbyteswap, xcounter, xdata0
.set i, 1
.rept (by - 1)
club XDATA, i
vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata
vptest ddq_low_msk(%rip), var_xdata
jnz 1f
vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
1:
vpshufb xbyteswap, var_xdata, var_xdata
.set i, (i +1)
.endr
.if \xctr
movq counter, xtmp
.set i, 0
.rept (by)
club XDATA, i
vpaddq (ddq_add_1 + 16 * i)(%rip), xtmp, var_xdata
.set i, (i +1)
.endr
.set i, 0
.rept (by)
club XDATA, i
vpxor xiv, var_xdata, var_xdata
.set i, (i +1)
.endr
.else
vpshufb xbyteswap, xcounter, xdata0
.set i, 1
.rept (by - 1)
club XDATA, i
vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata
vptest ddq_low_msk(%rip), var_xdata
jnz 1f
vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
1:
vpshufb xbyteswap, var_xdata, var_xdata
.set i, (i +1)
.endr
.endif
vmovdqa 1*16(p_keys), xkeyA
vpxor xkey0, xdata0, xdata0
vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter
vptest ddq_low_msk(%rip), xcounter
jnz 1f
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
1:
.if \xctr
add $by, counter
.else
vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter
vptest ddq_low_msk(%rip), xcounter
jnz 1f
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
1:
.endif
.set i, 1
.rept (by - 1)
@ -371,94 +397,99 @@ ddq_add_8:
.endr
.endm
.macro do_aes_load val, key_len
do_aes \val, 1, \key_len
.macro do_aes_load val, key_len, xctr
do_aes \val, 1, \key_len, \xctr
.endm
.macro do_aes_noload val, key_len
do_aes \val, 0, \key_len
.macro do_aes_noload val, key_len, xctr
do_aes \val, 0, \key_len, \xctr
.endm
/* main body of aes ctr load */
.macro do_aes_ctrmain key_len
.macro do_aes_ctrmain key_len, xctr
cmp $16, num_bytes
jb .Ldo_return2\key_len
jb .Ldo_return2\xctr\key_len
vmovdqa byteswap_const(%rip), xbyteswap
vmovdqu (p_iv), xcounter
vpshufb xbyteswap, xcounter, xcounter
.if \xctr
shr $4, counter
vmovdqu (p_iv), xiv
.else
vmovdqa byteswap_const(%rip), xbyteswap
vmovdqu (p_iv), xcounter
vpshufb xbyteswap, xcounter, xcounter
.endif
mov num_bytes, tmp
and $(7*16), tmp
jz .Lmult_of_8_blks\key_len
jz .Lmult_of_8_blks\xctr\key_len
/* 1 <= tmp <= 7 */
cmp $(4*16), tmp
jg .Lgt4\key_len
je .Leq4\key_len
jg .Lgt4\xctr\key_len
je .Leq4\xctr\key_len
.Llt4\key_len:
.Llt4\xctr\key_len:
cmp $(2*16), tmp
jg .Leq3\key_len
je .Leq2\key_len
jg .Leq3\xctr\key_len
je .Leq2\xctr\key_len
.Leq1\key_len:
do_aes_load 1, \key_len
.Leq1\xctr\key_len:
do_aes_load 1, \key_len, \xctr
add $(1*16), p_out
and $(~7*16), num_bytes
jz .Ldo_return2\key_len
jmp .Lmain_loop2\key_len
jz .Ldo_return2\xctr\key_len
jmp .Lmain_loop2\xctr\key_len
.Leq2\key_len:
do_aes_load 2, \key_len
.Leq2\xctr\key_len:
do_aes_load 2, \key_len, \xctr
add $(2*16), p_out
and $(~7*16), num_bytes
jz .Ldo_return2\key_len
jmp .Lmain_loop2\key_len
jz .Ldo_return2\xctr\key_len
jmp .Lmain_loop2\xctr\key_len
.Leq3\key_len:
do_aes_load 3, \key_len
.Leq3\xctr\key_len:
do_aes_load 3, \key_len, \xctr
add $(3*16), p_out
and $(~7*16), num_bytes
jz .Ldo_return2\key_len
jmp .Lmain_loop2\key_len
jz .Ldo_return2\xctr\key_len
jmp .Lmain_loop2\xctr\key_len
.Leq4\key_len:
do_aes_load 4, \key_len
.Leq4\xctr\key_len:
do_aes_load 4, \key_len, \xctr
add $(4*16), p_out
and $(~7*16), num_bytes
jz .Ldo_return2\key_len
jmp .Lmain_loop2\key_len
jz .Ldo_return2\xctr\key_len
jmp .Lmain_loop2\xctr\key_len
.Lgt4\key_len:
.Lgt4\xctr\key_len:
cmp $(6*16), tmp
jg .Leq7\key_len
je .Leq6\key_len
jg .Leq7\xctr\key_len
je .Leq6\xctr\key_len
.Leq5\key_len:
do_aes_load 5, \key_len
.Leq5\xctr\key_len:
do_aes_load 5, \key_len, \xctr
add $(5*16), p_out
and $(~7*16), num_bytes
jz .Ldo_return2\key_len
jmp .Lmain_loop2\key_len
jz .Ldo_return2\xctr\key_len
jmp .Lmain_loop2\xctr\key_len
.Leq6\key_len:
do_aes_load 6, \key_len
.Leq6\xctr\key_len:
do_aes_load 6, \key_len, \xctr
add $(6*16), p_out
and $(~7*16), num_bytes
jz .Ldo_return2\key_len
jmp .Lmain_loop2\key_len
jz .Ldo_return2\xctr\key_len
jmp .Lmain_loop2\xctr\key_len
.Leq7\key_len:
do_aes_load 7, \key_len
.Leq7\xctr\key_len:
do_aes_load 7, \key_len, \xctr
add $(7*16), p_out
and $(~7*16), num_bytes
jz .Ldo_return2\key_len
jmp .Lmain_loop2\key_len
jz .Ldo_return2\xctr\key_len
jmp .Lmain_loop2\xctr\key_len
.Lmult_of_8_blks\key_len:
.Lmult_of_8_blks\xctr\key_len:
.if (\key_len != KEY_128)
vmovdqa 0*16(p_keys), xkey0
vmovdqa 4*16(p_keys), xkey4
@ -471,17 +502,19 @@ ddq_add_8:
vmovdqa 9*16(p_keys), xkey12
.endif
.align 16
.Lmain_loop2\key_len:
.Lmain_loop2\xctr\key_len:
/* num_bytes is a multiple of 8 and >0 */
do_aes_noload 8, \key_len
do_aes_noload 8, \key_len, \xctr
add $(8*16), p_out
sub $(8*16), num_bytes
jne .Lmain_loop2\key_len
jne .Lmain_loop2\xctr\key_len
.Ldo_return2\key_len:
/* return updated IV */
vpshufb xbyteswap, xcounter, xcounter
vmovdqu xcounter, (p_iv)
.Ldo_return2\xctr\key_len:
.if !\xctr
/* return updated IV */
vpshufb xbyteswap, xcounter, xcounter
vmovdqu xcounter, (p_iv)
.endif
RET
.endm
@ -494,7 +527,7 @@ ddq_add_8:
*/
SYM_FUNC_START(aes_ctr_enc_128_avx_by8)
/* call the aes main loop */
do_aes_ctrmain KEY_128
do_aes_ctrmain KEY_128 0
SYM_FUNC_END(aes_ctr_enc_128_avx_by8)
@ -507,7 +540,7 @@ SYM_FUNC_END(aes_ctr_enc_128_avx_by8)
*/
SYM_FUNC_START(aes_ctr_enc_192_avx_by8)
/* call the aes main loop */
do_aes_ctrmain KEY_192
do_aes_ctrmain KEY_192 0
SYM_FUNC_END(aes_ctr_enc_192_avx_by8)
@ -520,6 +553,45 @@ SYM_FUNC_END(aes_ctr_enc_192_avx_by8)
*/
SYM_FUNC_START(aes_ctr_enc_256_avx_by8)
/* call the aes main loop */
do_aes_ctrmain KEY_256
do_aes_ctrmain KEY_256 0
SYM_FUNC_END(aes_ctr_enc_256_avx_by8)
/*
* routine to do AES128 XCTR enc/decrypt "by8"
* XMM registers are clobbered.
* Saving/restoring must be done at a higher level
* aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv, const void *keys,
* u8* out, unsigned int num_bytes, unsigned int byte_ctr)
*/
SYM_FUNC_START(aes_xctr_enc_128_avx_by8)
/* call the aes main loop */
do_aes_ctrmain KEY_128 1
SYM_FUNC_END(aes_xctr_enc_128_avx_by8)
/*
* routine to do AES192 XCTR enc/decrypt "by8"
* XMM registers are clobbered.
* Saving/restoring must be done at a higher level
* aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv, const void *keys,
* u8* out, unsigned int num_bytes, unsigned int byte_ctr)
*/
SYM_FUNC_START(aes_xctr_enc_192_avx_by8)
/* call the aes main loop */
do_aes_ctrmain KEY_192 1
SYM_FUNC_END(aes_xctr_enc_192_avx_by8)
/*
* routine to do AES256 XCTR enc/decrypt "by8"
* XMM registers are clobbered.
* Saving/restoring must be done at a higher level
* aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv, const void *keys,
* u8* out, unsigned int num_bytes, unsigned int byte_ctr)
*/
SYM_FUNC_START(aes_xctr_enc_256_avx_by8)
/* call the aes main loop */
do_aes_ctrmain KEY_256 1
SYM_FUNC_END(aes_xctr_enc_256_avx_by8)

View File

@ -135,6 +135,20 @@ asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv,
void *keys, u8 *out, unsigned int num_bytes);
asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv,
void *keys, u8 *out, unsigned int num_bytes);
asmlinkage void aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv,
const void *keys, u8 *out, unsigned int num_bytes,
unsigned int byte_ctr);
asmlinkage void aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv,
const void *keys, u8 *out, unsigned int num_bytes,
unsigned int byte_ctr);
asmlinkage void aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv,
const void *keys, u8 *out, unsigned int num_bytes,
unsigned int byte_ctr);
/*
* asmlinkage void aesni_gcm_init_avx_gen2()
* gcm_data *my_ctx_data, context data
@ -527,6 +541,59 @@ static int ctr_crypt(struct skcipher_request *req)
return err;
}
static void aesni_xctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv,
unsigned int byte_ctr)
{
if (ctx->key_length == AES_KEYSIZE_128)
aes_xctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len,
byte_ctr);
else if (ctx->key_length == AES_KEYSIZE_192)
aes_xctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len,
byte_ctr);
else
aes_xctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len,
byte_ctr);
}
static int xctr_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
u8 keystream[AES_BLOCK_SIZE];
struct skcipher_walk walk;
unsigned int nbytes;
unsigned int byte_ctr = 0;
int err;
__le32 block[AES_BLOCK_SIZE / sizeof(__le32)];
err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) > 0) {
kernel_fpu_begin();
if (nbytes & AES_BLOCK_MASK)
aesni_xctr_enc_avx_tfm(ctx, walk.dst.virt.addr,
walk.src.virt.addr, nbytes & AES_BLOCK_MASK,
walk.iv, byte_ctr);
nbytes &= ~AES_BLOCK_MASK;
byte_ctr += walk.nbytes - nbytes;
if (walk.nbytes == walk.total && nbytes > 0) {
memcpy(block, walk.iv, AES_BLOCK_SIZE);
block[0] ^= cpu_to_le32(1 + byte_ctr / AES_BLOCK_SIZE);
aesni_enc(ctx, keystream, (u8 *)block);
crypto_xor_cpy(walk.dst.virt.addr + walk.nbytes -
nbytes, walk.src.virt.addr + walk.nbytes
- nbytes, keystream, nbytes);
byte_ctr += nbytes;
nbytes = 0;
}
kernel_fpu_end();
err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static int
rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
{
@ -1050,6 +1117,33 @@ static struct skcipher_alg aesni_skciphers[] = {
static
struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
#ifdef CONFIG_X86_64
/*
* XCTR does not have a non-AVX implementation, so it must be enabled
* conditionally.
*/
static struct skcipher_alg aesni_xctr = {
.base = {
.cra_name = "__xctr(aes)",
.cra_driver_name = "__xctr-aes-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = CRYPTO_AES_CTX_SIZE,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.chunksize = AES_BLOCK_SIZE,
.setkey = aesni_skcipher_setkey,
.encrypt = xctr_crypt,
.decrypt = xctr_crypt,
};
static struct simd_skcipher_alg *aesni_simd_xctr;
#endif /* CONFIG_X86_64 */
#ifdef CONFIG_X86_64
static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
unsigned int key_len)
@ -1163,7 +1257,7 @@ static int __init aesni_init(void)
static_call_update(aesni_ctr_enc_tfm, aesni_ctr_enc_avx_tfm);
pr_info("AES CTR mode by8 optimization enabled\n");
}
#endif
#endif /* CONFIG_X86_64 */
err = crypto_register_alg(&aesni_cipher_alg);
if (err)
@ -1180,8 +1274,22 @@ static int __init aesni_init(void)
if (err)
goto unregister_skciphers;
#ifdef CONFIG_X86_64
if (boot_cpu_has(X86_FEATURE_AVX))
err = simd_register_skciphers_compat(&aesni_xctr, 1,
&aesni_simd_xctr);
if (err)
goto unregister_aeads;
#endif /* CONFIG_X86_64 */
return 0;
#ifdef CONFIG_X86_64
unregister_aeads:
simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads),
aesni_simd_aeads);
#endif /* CONFIG_X86_64 */
unregister_skciphers:
simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
aesni_simd_skciphers);
@ -1197,6 +1305,10 @@ static void __exit aesni_exit(void)
simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
aesni_simd_skciphers);
crypto_unregister_alg(&aesni_cipher_alg);
#ifdef CONFIG_X86_64
if (boot_cpu_has(X86_FEATURE_AVX))
simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr);
#endif /* CONFIG_X86_64 */
}
late_initcall(aesni_init);

View File

@ -4,7 +4,6 @@
*/
#include <crypto/internal/blake2s.h>
#include <crypto/internal/simd.h>
#include <linux/types.h>
#include <linux/jump_label.h>
@ -33,7 +32,7 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block,
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) {
blake2s_compress_generic(state, block, nblocks, inc);
return;
}

View File

@ -1,77 +0,0 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <crypto/internal/blake2s.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/hash.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sizes.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
static int crypto_blake2s_update_x86(struct shash_desc *desc,
const u8 *in, unsigned int inlen)
{
return crypto_blake2s_update(desc, in, inlen, false);
}
static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out)
{
return crypto_blake2s_final(desc, out, false);
}
#define BLAKE2S_ALG(name, driver_name, digest_size) \
{ \
.base.cra_name = name, \
.base.cra_driver_name = driver_name, \
.base.cra_priority = 200, \
.base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
.base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \
.base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \
.base.cra_module = THIS_MODULE, \
.digestsize = digest_size, \
.setkey = crypto_blake2s_setkey, \
.init = crypto_blake2s_init, \
.update = crypto_blake2s_update_x86, \
.final = crypto_blake2s_final_x86, \
.descsize = sizeof(struct blake2s_state), \
}
static struct shash_alg blake2s_algs[] = {
BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE),
BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE),
BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE),
BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE),
};
static int __init blake2s_mod_init(void)
{
if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
return 0;
}
static void __exit blake2s_mod_exit(void)
{
if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
}
module_init(blake2s_mod_init);
module_exit(blake2s_mod_exit);
MODULE_ALIAS_CRYPTO("blake2s-128");
MODULE_ALIAS_CRYPTO("blake2s-128-x86");
MODULE_ALIAS_CRYPTO("blake2s-160");
MODULE_ALIAS_CRYPTO("blake2s-160-x86");
MODULE_ALIAS_CRYPTO("blake2s-224");
MODULE_ALIAS_CRYPTO("blake2s-224-x86");
MODULE_ALIAS_CRYPTO("blake2s-256");
MODULE_ALIAS_CRYPTO("blake2s-256-x86");
MODULE_LICENSE("GPL v2");

View File

@ -0,0 +1,321 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright 2021 Google LLC
*/
/*
* This is an efficient implementation of POLYVAL using intel PCLMULQDQ-NI
* instructions. It works on 8 blocks at a time, by precomputing the first 8
* keys powers h^8, ..., h^1 in the POLYVAL finite field. This precomputation
* allows us to split finite field multiplication into two steps.
*
* In the first step, we consider h^i, m_i as normal polynomials of degree less
* than 128. We then compute p(x) = h^8m_0 + ... + h^1m_7 where multiplication
* is simply polynomial multiplication.
*
* In the second step, we compute the reduction of p(x) modulo the finite field
* modulus g(x) = x^128 + x^127 + x^126 + x^121 + 1.
*
* This two step process is equivalent to computing h^8m_0 + ... + h^1m_7 where
* multiplication is finite field multiplication. The advantage is that the
* two-step process only requires 1 finite field reduction for every 8
* polynomial multiplications. Further parallelism is gained by interleaving the
* multiplications and polynomial reductions.
*/
#include <linux/linkage.h>
#include <asm/frame.h>
#define STRIDE_BLOCKS 8
#define GSTAR %xmm7
#define PL %xmm8
#define PH %xmm9
#define TMP_XMM %xmm11
#define LO %xmm12
#define HI %xmm13
#define MI %xmm14
#define SUM %xmm15
#define KEY_POWERS %rdi
#define MSG %rsi
#define BLOCKS_LEFT %rdx
#define ACCUMULATOR %rcx
#define TMP %rax
.section .rodata.cst16.gstar, "aM", @progbits, 16
.align 16
.Lgstar:
.quad 0xc200000000000000, 0xc200000000000000
.text
/*
* Performs schoolbook1_iteration on two lists of 128-bit polynomials of length
* count pointed to by MSG and KEY_POWERS.
*/
.macro schoolbook1 count
.set i, 0
.rept (\count)
schoolbook1_iteration i 0
.set i, (i +1)
.endr
.endm
/*
* Computes the product of two 128-bit polynomials at the memory locations
* specified by (MSG + 16*i) and (KEY_POWERS + 16*i) and XORs the components of
* the 256-bit product into LO, MI, HI.
*
* Given:
* X = [X_1 : X_0]
* Y = [Y_1 : Y_0]
*
* We compute:
* LO += X_0 * Y_0
* MI += X_0 * Y_1 + X_1 * Y_0
* HI += X_1 * Y_1
*
* Later, the 256-bit result can be extracted as:
* [HI_1 : HI_0 + MI_1 : LO_1 + MI_0 : LO_0]
* This step is done when computing the polynomial reduction for efficiency
* reasons.
*
* If xor_sum == 1, then also XOR the value of SUM into m_0. This avoids an
* extra multiplication of SUM and h^8.
*/
.macro schoolbook1_iteration i xor_sum
movups (16*\i)(MSG), %xmm0
.if (\i == 0 && \xor_sum == 1)
pxor SUM, %xmm0
.endif
vpclmulqdq $0x01, (16*\i)(KEY_POWERS), %xmm0, %xmm2
vpclmulqdq $0x00, (16*\i)(KEY_POWERS), %xmm0, %xmm1
vpclmulqdq $0x10, (16*\i)(KEY_POWERS), %xmm0, %xmm3
vpclmulqdq $0x11, (16*\i)(KEY_POWERS), %xmm0, %xmm4
vpxor %xmm2, MI, MI
vpxor %xmm1, LO, LO
vpxor %xmm4, HI, HI
vpxor %xmm3, MI, MI
.endm
/*
* Performs the same computation as schoolbook1_iteration, except we expect the
* arguments to already be loaded into xmm0 and xmm1 and we set the result
* registers LO, MI, and HI directly rather than XOR'ing into them.
*/
.macro schoolbook1_noload
vpclmulqdq $0x01, %xmm0, %xmm1, MI
vpclmulqdq $0x10, %xmm0, %xmm1, %xmm2
vpclmulqdq $0x00, %xmm0, %xmm1, LO
vpclmulqdq $0x11, %xmm0, %xmm1, HI
vpxor %xmm2, MI, MI
.endm
/*
* Computes the 256-bit polynomial represented by LO, HI, MI. Stores
* the result in PL, PH.
* [PH : PL] = [HI_1 : HI_0 + MI_1 : LO_1 + MI_0 : LO_0]
*/
.macro schoolbook2
vpslldq $8, MI, PL
vpsrldq $8, MI, PH
pxor LO, PL
pxor HI, PH
.endm
/*
* Computes the 128-bit reduction of PH : PL. Stores the result in dest.
*
* This macro computes p(x) mod g(x) where p(x) is in montgomery form and g(x) =
* x^128 + x^127 + x^126 + x^121 + 1.
*
* We have a 256-bit polynomial PH : PL = P_3 : P_2 : P_1 : P_0 that is the
* product of two 128-bit polynomials in Montgomery form. We need to reduce it
* mod g(x). Also, since polynomials in Montgomery form have an "extra" factor
* of x^128, this product has two extra factors of x^128. To get it back into
* Montgomery form, we need to remove one of these factors by dividing by x^128.
*
* To accomplish both of these goals, we add multiples of g(x) that cancel out
* the low 128 bits P_1 : P_0, leaving just the high 128 bits. Since the low
* bits are zero, the polynomial division by x^128 can be done by right shifting.
*
* Since the only nonzero term in the low 64 bits of g(x) is the constant term,
* the multiple of g(x) needed to cancel out P_0 is P_0 * g(x). The CPU can
* only do 64x64 bit multiplications, so split P_0 * g(x) into x^128 * P_0 +
* x^64 * g*(x) * P_0 + P_0, where g*(x) is bits 64-127 of g(x). Adding this to
* the original polynomial gives P_3 : P_2 + P_0 + T_1 : P_1 + T_0 : 0, where T
* = T_1 : T_0 = g*(x) * P_0. Thus, bits 0-63 got "folded" into bits 64-191.
*
* Repeating this same process on the next 64 bits "folds" bits 64-127 into bits
* 128-255, giving the answer in bits 128-255. This time, we need to cancel P_1
* + T_0 in bits 64-127. The multiple of g(x) required is (P_1 + T_0) * g(x) *
* x^64. Adding this to our previous computation gives P_3 + P_1 + T_0 + V_1 :
* P_2 + P_0 + T_1 + V_0 : 0 : 0, where V = V_1 : V_0 = g*(x) * (P_1 + T_0).
*
* So our final computation is:
* T = T_1 : T_0 = g*(x) * P_0
* V = V_1 : V_0 = g*(x) * (P_1 + T_0)
* p(x) / x^{128} mod g(x) = P_3 + P_1 + T_0 + V_1 : P_2 + P_0 + T_1 + V_0
*
* The implementation below saves a XOR instruction by computing P_1 + T_0 : P_0
* + T_1 and XORing into dest, rather than separately XORing P_1 : P_0 and T_0 :
* T_1 into dest. This allows us to reuse P_1 + T_0 when computing V.
*/
.macro montgomery_reduction dest
vpclmulqdq $0x00, PL, GSTAR, TMP_XMM # TMP_XMM = T_1 : T_0 = P_0 * g*(x)
pshufd $0b01001110, TMP_XMM, TMP_XMM # TMP_XMM = T_0 : T_1
pxor PL, TMP_XMM # TMP_XMM = P_1 + T_0 : P_0 + T_1
pxor TMP_XMM, PH # PH = P_3 + P_1 + T_0 : P_2 + P_0 + T_1
pclmulqdq $0x11, GSTAR, TMP_XMM # TMP_XMM = V_1 : V_0 = V = [(P_1 + T_0) * g*(x)]
vpxor TMP_XMM, PH, \dest
.endm
/*
* Compute schoolbook multiplication for 8 blocks
* m_0h^8 + ... + m_7h^1
*
* If reduce is set, also computes the montgomery reduction of the
* previous full_stride call and XORs with the first message block.
* (m_0 + REDUCE(PL, PH))h^8 + ... + m_7h^1.
* I.e., the first multiplication uses m_0 + REDUCE(PL, PH) instead of m_0.
*/
.macro full_stride reduce
pxor LO, LO
pxor HI, HI
pxor MI, MI
schoolbook1_iteration 7 0
.if \reduce
vpclmulqdq $0x00, PL, GSTAR, TMP_XMM
.endif
schoolbook1_iteration 6 0
.if \reduce
pshufd $0b01001110, TMP_XMM, TMP_XMM
.endif
schoolbook1_iteration 5 0
.if \reduce
pxor PL, TMP_XMM
.endif
schoolbook1_iteration 4 0
.if \reduce
pxor TMP_XMM, PH
.endif
schoolbook1_iteration 3 0
.if \reduce
pclmulqdq $0x11, GSTAR, TMP_XMM
.endif
schoolbook1_iteration 2 0
.if \reduce
vpxor TMP_XMM, PH, SUM
.endif
schoolbook1_iteration 1 0
schoolbook1_iteration 0 1
addq $(8*16), MSG
schoolbook2
.endm
/*
* Process BLOCKS_LEFT blocks, where 0 < BLOCKS_LEFT < STRIDE_BLOCKS
*/
.macro partial_stride
mov BLOCKS_LEFT, TMP
shlq $4, TMP
addq $(16*STRIDE_BLOCKS), KEY_POWERS
subq TMP, KEY_POWERS
movups (MSG), %xmm0
pxor SUM, %xmm0
movaps (KEY_POWERS), %xmm1
schoolbook1_noload
dec BLOCKS_LEFT
addq $16, MSG
addq $16, KEY_POWERS
test $4, BLOCKS_LEFT
jz .Lpartial4BlocksDone
schoolbook1 4
addq $(4*16), MSG
addq $(4*16), KEY_POWERS
.Lpartial4BlocksDone:
test $2, BLOCKS_LEFT
jz .Lpartial2BlocksDone
schoolbook1 2
addq $(2*16), MSG
addq $(2*16), KEY_POWERS
.Lpartial2BlocksDone:
test $1, BLOCKS_LEFT
jz .LpartialDone
schoolbook1 1
.LpartialDone:
schoolbook2
montgomery_reduction SUM
.endm
/*
* Perform montgomery multiplication in GF(2^128) and store result in op1.
*
* Computes op1*op2*x^{-128} mod x^128 + x^127 + x^126 + x^121 + 1
* If op1, op2 are in montgomery form, this computes the montgomery
* form of op1*op2.
*
* void clmul_polyval_mul(u8 *op1, const u8 *op2);
*/
SYM_FUNC_START(clmul_polyval_mul)
FRAME_BEGIN
vmovdqa .Lgstar(%rip), GSTAR
movups (%rdi), %xmm0
movups (%rsi), %xmm1
schoolbook1_noload
schoolbook2
montgomery_reduction SUM
movups SUM, (%rdi)
FRAME_END
RET
SYM_FUNC_END(clmul_polyval_mul)
/*
* Perform polynomial evaluation as specified by POLYVAL. This computes:
* h^n * accumulator + h^n * m_0 + ... + h^1 * m_{n-1}
* where n=nblocks, h is the hash key, and m_i are the message blocks.
*
* rdi - pointer to precomputed key powers h^8 ... h^1
* rsi - pointer to message blocks
* rdx - number of blocks to hash
* rcx - pointer to the accumulator
*
* void clmul_polyval_update(const struct polyval_tfm_ctx *keys,
* const u8 *in, size_t nblocks, u8 *accumulator);
*/
SYM_FUNC_START(clmul_polyval_update)
FRAME_BEGIN
vmovdqa .Lgstar(%rip), GSTAR
movups (ACCUMULATOR), SUM
subq $STRIDE_BLOCKS, BLOCKS_LEFT
js .LstrideLoopExit
full_stride 0
subq $STRIDE_BLOCKS, BLOCKS_LEFT
js .LstrideLoopExitReduce
.LstrideLoop:
full_stride 1
subq $STRIDE_BLOCKS, BLOCKS_LEFT
jns .LstrideLoop
.LstrideLoopExitReduce:
montgomery_reduction SUM
.LstrideLoopExit:
add $STRIDE_BLOCKS, BLOCKS_LEFT
jz .LskipPartial
partial_stride
.LskipPartial:
movups SUM, (ACCUMULATOR)
FRAME_END
RET
SYM_FUNC_END(clmul_polyval_update)

View File

@ -0,0 +1,203 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Glue code for POLYVAL using PCMULQDQ-NI
*
* Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
* Copyright (c) 2009 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
* Copyright 2021 Google LLC
*/
/*
* Glue code based on ghash-clmulni-intel_glue.c.
*
* This implementation of POLYVAL uses montgomery multiplication
* accelerated by PCLMULQDQ-NI to implement the finite field
* operations.
*/
#include <crypto/algapi.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/polyval.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/cpu_device_id.h>
#include <asm/simd.h>
#define NUM_KEY_POWERS 8
struct polyval_tfm_ctx {
/*
* These powers must be in the order h^8, ..., h^1.
*/
u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE];
};
struct polyval_desc_ctx {
u8 buffer[POLYVAL_BLOCK_SIZE];
u32 bytes;
};
asmlinkage void clmul_polyval_update(const struct polyval_tfm_ctx *keys,
const u8 *in, size_t nblocks, u8 *accumulator);
asmlinkage void clmul_polyval_mul(u8 *op1, const u8 *op2);
static void internal_polyval_update(const struct polyval_tfm_ctx *keys,
const u8 *in, size_t nblocks, u8 *accumulator)
{
if (likely(crypto_simd_usable())) {
kernel_fpu_begin();
clmul_polyval_update(keys, in, nblocks, accumulator);
kernel_fpu_end();
} else {
polyval_update_non4k(keys->key_powers[NUM_KEY_POWERS-1], in,
nblocks, accumulator);
}
}
static void internal_polyval_mul(u8 *op1, const u8 *op2)
{
if (likely(crypto_simd_usable())) {
kernel_fpu_begin();
clmul_polyval_mul(op1, op2);
kernel_fpu_end();
} else {
polyval_mul_non4k(op1, op2);
}
}
static int polyval_x86_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
struct polyval_tfm_ctx *tctx = crypto_shash_ctx(tfm);
int i;
if (keylen != POLYVAL_BLOCK_SIZE)
return -EINVAL;
memcpy(tctx->key_powers[NUM_KEY_POWERS-1], key, POLYVAL_BLOCK_SIZE);
for (i = NUM_KEY_POWERS-2; i >= 0; i--) {
memcpy(tctx->key_powers[i], key, POLYVAL_BLOCK_SIZE);
internal_polyval_mul(tctx->key_powers[i],
tctx->key_powers[i+1]);
}
return 0;
}
static int polyval_x86_init(struct shash_desc *desc)
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
memset(dctx, 0, sizeof(*dctx));
return 0;
}
static int polyval_x86_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
u8 *pos;
unsigned int nblocks;
unsigned int n;
if (dctx->bytes) {
n = min(srclen, dctx->bytes);
pos = dctx->buffer + POLYVAL_BLOCK_SIZE - dctx->bytes;
dctx->bytes -= n;
srclen -= n;
while (n--)
*pos++ ^= *src++;
if (!dctx->bytes)
internal_polyval_mul(dctx->buffer,
tctx->key_powers[NUM_KEY_POWERS-1]);
}
while (srclen >= POLYVAL_BLOCK_SIZE) {
/* Allow rescheduling every 4K bytes. */
nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE;
internal_polyval_update(tctx, src, nblocks, dctx->buffer);
srclen -= nblocks * POLYVAL_BLOCK_SIZE;
src += nblocks * POLYVAL_BLOCK_SIZE;
}
if (srclen) {
dctx->bytes = POLYVAL_BLOCK_SIZE - srclen;
pos = dctx->buffer;
while (srclen--)
*pos++ ^= *src++;
}
return 0;
}
static int polyval_x86_final(struct shash_desc *desc, u8 *dst)
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
if (dctx->bytes) {
internal_polyval_mul(dctx->buffer,
tctx->key_powers[NUM_KEY_POWERS-1]);
}
memcpy(dst, dctx->buffer, POLYVAL_BLOCK_SIZE);
return 0;
}
static struct shash_alg polyval_alg = {
.digestsize = POLYVAL_DIGEST_SIZE,
.init = polyval_x86_init,
.update = polyval_x86_update,
.final = polyval_x86_final,
.setkey = polyval_x86_setkey,
.descsize = sizeof(struct polyval_desc_ctx),
.base = {
.cra_name = "polyval",
.cra_driver_name = "polyval-clmulni",
.cra_priority = 200,
.cra_blocksize = POLYVAL_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct polyval_tfm_ctx),
.cra_module = THIS_MODULE,
},
};
__maybe_unused static const struct x86_cpu_id pcmul_cpu_id[] = {
X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL),
{}
};
MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
static int __init polyval_clmulni_mod_init(void)
{
if (!x86_match_cpu(pcmul_cpu_id))
return -ENODEV;
if (!boot_cpu_has(X86_FEATURE_AVX))
return -ENODEV;
return crypto_register_shash(&polyval_alg);
}
static void __exit polyval_clmulni_mod_exit(void)
{
crypto_unregister_shash(&polyval_alg);
}
module_init(polyval_clmulni_mod_init);
module_exit(polyval_clmulni_mod_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("POLYVAL hash function accelerated by PCLMULQDQ-NI");
MODULE_ALIAS_CRYPTO("polyval");
MODULE_ALIAS_CRYPTO("polyval-clmulni");

View File

@ -461,6 +461,15 @@ config CRYPTO_PCBC
PCBC: Propagating Cipher Block Chaining mode
This block cipher algorithm is required for RxRPC.
config CRYPTO_XCTR
tristate
select CRYPTO_SKCIPHER
select CRYPTO_MANAGER
help
XCTR: XOR Counter mode. This blockcipher mode is a variant of CTR mode
using XORs and little-endian addition rather than big-endian arithmetic.
XCTR mode is used to implement HCTR2.
config CRYPTO_XTS
tristate "XTS support"
select CRYPTO_SKCIPHER
@ -524,6 +533,17 @@ config CRYPTO_ADIANTUM
If unsure, say N.
config CRYPTO_HCTR2
tristate "HCTR2 support"
select CRYPTO_XCTR
select CRYPTO_POLYVAL
select CRYPTO_MANAGER
help
HCTR2 is a length-preserving encryption mode for storage encryption that
is efficient on processors with instructions to accelerate AES and
carryless multiplication, e.g. x86 processors with AES-NI and CLMUL, and
ARM processors with the ARMv8 crypto extensions.
config CRYPTO_ESSIV
tristate "ESSIV support for block encryption"
select CRYPTO_AUTHENC
@ -692,26 +712,8 @@ config CRYPTO_BLAKE2B
See https://blake2.net for further information.
config CRYPTO_BLAKE2S
tristate "BLAKE2s digest algorithm"
select CRYPTO_LIB_BLAKE2S_GENERIC
select CRYPTO_HASH
help
Implementation of cryptographic hash function BLAKE2s
optimized for 8-32bit platforms and can produce digests of any size
between 1 to 32. The keyed hash is also implemented.
This module provides the following algorithms:
- blake2s-128
- blake2s-160
- blake2s-224
- blake2s-256
See https://blake2.net for further information.
config CRYPTO_BLAKE2S_X86
tristate "BLAKE2s digest algorithm (x86 accelerated version)"
bool "BLAKE2s digest algorithm (x86 accelerated version)"
depends on X86 && 64BIT
select CRYPTO_LIB_BLAKE2S_GENERIC
select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
@ -765,6 +767,23 @@ config CRYPTO_GHASH
GHASH is the hash function used in GCM (Galois/Counter Mode).
It is not a general-purpose cryptographic hash function.
config CRYPTO_POLYVAL
tristate
select CRYPTO_GF128MUL
select CRYPTO_HASH
help
POLYVAL is the hash function used in HCTR2. It is not a general-purpose
cryptographic hash function.
config CRYPTO_POLYVAL_CLMUL_NI
tristate "POLYVAL hash function (CLMUL-NI accelerated)"
depends on X86 && 64BIT
select CRYPTO_POLYVAL
help
This is the x86_64 CLMUL-NI accelerated implementation of POLYVAL. It is
used to efficiently implement HCTR2 on x86-64 processors that support
carry-less multiplication instructions.
config CRYPTO_POLY1305
tristate "Poly1305 authenticator algorithm"
select CRYPTO_HASH
@ -1142,7 +1161,7 @@ config CRYPTO_AES_NI_INTEL
In addition to AES cipher algorithm support, the acceleration
for some popular block cipher mode is supported too, including
ECB, CBC, LRW, XTS. The 64 bit version has additional
acceleration for CTR.
acceleration for CTR and XCTR.
config CRYPTO_AES_SPARC64
tristate "AES cipher algorithms (SPARC64)"

View File

@ -84,7 +84,6 @@ obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o
obj-$(CONFIG_CRYPTO_WP512) += wp512.o
CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b_generic.o
obj-$(CONFIG_CRYPTO_BLAKE2S) += blake2s_generic.o
obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
obj-$(CONFIG_CRYPTO_ECB) += ecb.o
obj-$(CONFIG_CRYPTO_CBC) += cbc.o
@ -94,6 +93,8 @@ obj-$(CONFIG_CRYPTO_CTS) += cts.o
obj-$(CONFIG_CRYPTO_LRW) += lrw.o
obj-$(CONFIG_CRYPTO_XTS) += xts.o
obj-$(CONFIG_CRYPTO_CTR) += ctr.o
obj-$(CONFIG_CRYPTO_XCTR) += xctr.o
obj-$(CONFIG_CRYPTO_HCTR2) += hctr2.o
obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o
obj-$(CONFIG_CRYPTO_ADIANTUM) += adiantum.o
obj-$(CONFIG_CRYPTO_NHPOLY1305) += nhpoly1305.o
@ -171,6 +172,7 @@ UBSAN_SANITIZE_jitterentropy.o = n
jitterentropy_rng-y := jitterentropy.o jitterentropy-kcapi.o
obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
obj-$(CONFIG_CRYPTO_GHASH) += ghash-generic.o
obj-$(CONFIG_CRYPTO_POLYVAL) += polyval-generic.o
obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o
obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o
obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o

View File

@ -1,75 +0,0 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* shash interface to the generic implementation of BLAKE2s
*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <crypto/internal/blake2s.h>
#include <crypto/internal/hash.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
static int crypto_blake2s_update_generic(struct shash_desc *desc,
const u8 *in, unsigned int inlen)
{
return crypto_blake2s_update(desc, in, inlen, true);
}
static int crypto_blake2s_final_generic(struct shash_desc *desc, u8 *out)
{
return crypto_blake2s_final(desc, out, true);
}
#define BLAKE2S_ALG(name, driver_name, digest_size) \
{ \
.base.cra_name = name, \
.base.cra_driver_name = driver_name, \
.base.cra_priority = 100, \
.base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
.base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \
.base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \
.base.cra_module = THIS_MODULE, \
.digestsize = digest_size, \
.setkey = crypto_blake2s_setkey, \
.init = crypto_blake2s_init, \
.update = crypto_blake2s_update_generic, \
.final = crypto_blake2s_final_generic, \
.descsize = sizeof(struct blake2s_state), \
}
static struct shash_alg blake2s_algs[] = {
BLAKE2S_ALG("blake2s-128", "blake2s-128-generic",
BLAKE2S_128_HASH_SIZE),
BLAKE2S_ALG("blake2s-160", "blake2s-160-generic",
BLAKE2S_160_HASH_SIZE),
BLAKE2S_ALG("blake2s-224", "blake2s-224-generic",
BLAKE2S_224_HASH_SIZE),
BLAKE2S_ALG("blake2s-256", "blake2s-256-generic",
BLAKE2S_256_HASH_SIZE),
};
static int __init blake2s_mod_init(void)
{
return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
}
static void __exit blake2s_mod_exit(void)
{
crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
}
subsys_initcall(blake2s_mod_init);
module_exit(blake2s_mod_exit);
MODULE_ALIAS_CRYPTO("blake2s-128");
MODULE_ALIAS_CRYPTO("blake2s-128-generic");
MODULE_ALIAS_CRYPTO("blake2s-160");
MODULE_ALIAS_CRYPTO("blake2s-160-generic");
MODULE_ALIAS_CRYPTO("blake2s-224");
MODULE_ALIAS_CRYPTO("blake2s-224-generic");
MODULE_ALIAS_CRYPTO("blake2s-256");
MODULE_ALIAS_CRYPTO("blake2s-256-generic");
MODULE_LICENSE("GPL v2");

581
crypto/hctr2.c Normal file
View File

@ -0,0 +1,581 @@
// SPDX-License-Identifier: GPL-2.0
/*
* HCTR2 length-preserving encryption mode
*
* Copyright 2021 Google LLC
*/
/*
* HCTR2 is a length-preserving encryption mode that is efficient on
* processors with instructions to accelerate AES and carryless
* multiplication, e.g. x86 processors with AES-NI and CLMUL, and ARM
* processors with the ARMv8 crypto extensions.
*
* For more details, see the paper: "Length-preserving encryption with HCTR2"
* (https://eprint.iacr.org/2021/1441.pdf)
*/
#include <crypto/internal/cipher.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
#include <crypto/polyval.h>
#include <crypto/scatterwalk.h>
#include <linux/module.h>
#define BLOCKCIPHER_BLOCK_SIZE 16
/*
* The specification allows variable-length tweaks, but Linux's crypto API
* currently only allows algorithms to support a single length. The "natural"
* tweak length for HCTR2 is 16, since that fits into one POLYVAL block for
* the best performance. But longer tweaks are useful for fscrypt, to avoid
* needing to derive per-file keys. So instead we use two blocks, or 32 bytes.
*/
#define TWEAK_SIZE 32
struct hctr2_instance_ctx {
struct crypto_cipher_spawn blockcipher_spawn;
struct crypto_skcipher_spawn xctr_spawn;
struct crypto_shash_spawn polyval_spawn;
};
struct hctr2_tfm_ctx {
struct crypto_cipher *blockcipher;
struct crypto_skcipher *xctr;
struct crypto_shash *polyval;
u8 L[BLOCKCIPHER_BLOCK_SIZE];
int hashed_tweak_offset;
/*
* This struct is allocated with extra space for two exported hash
* states. Since the hash state size is not known at compile-time, we
* can't add these to the struct directly.
*
* hashed_tweaklen_divisible;
* hashed_tweaklen_remainder;
*/
};
struct hctr2_request_ctx {
u8 first_block[BLOCKCIPHER_BLOCK_SIZE];
u8 xctr_iv[BLOCKCIPHER_BLOCK_SIZE];
struct scatterlist *bulk_part_dst;
struct scatterlist *bulk_part_src;
struct scatterlist sg_src[2];
struct scatterlist sg_dst[2];
/*
* Sub-request sizes are unknown at compile-time, so they need to go
* after the members with known sizes.
*/
union {
struct shash_desc hash_desc;
struct skcipher_request xctr_req;
} u;
/*
* This struct is allocated with extra space for one exported hash
* state. Since the hash state size is not known at compile-time, we
* can't add it to the struct directly.
*
* hashed_tweak;
*/
};
static inline u8 *hctr2_hashed_tweaklen(const struct hctr2_tfm_ctx *tctx,
bool has_remainder)
{
u8 *p = (u8 *)tctx + sizeof(*tctx);
if (has_remainder) /* For messages not a multiple of block length */
p += crypto_shash_statesize(tctx->polyval);
return p;
}
static inline u8 *hctr2_hashed_tweak(const struct hctr2_tfm_ctx *tctx,
struct hctr2_request_ctx *rctx)
{
return (u8 *)rctx + tctx->hashed_tweak_offset;
}
/*
* The input data for each HCTR2 hash step begins with a 16-byte block that
* contains the tweak length and a flag that indicates whether the input is evenly
* divisible into blocks. Since this implementation only supports one tweak
* length, we precompute the two hash states resulting from hashing the two
* possible values of this initial block. This reduces by one block the amount of
* data that needs to be hashed for each encryption/decryption
*
* These precomputed hashes are stored in hctr2_tfm_ctx.
*/
static int hctr2_hash_tweaklen(struct hctr2_tfm_ctx *tctx, bool has_remainder)
{
SHASH_DESC_ON_STACK(shash, tfm->polyval);
__le64 tweak_length_block[2];
int err;
shash->tfm = tctx->polyval;
memset(tweak_length_block, 0, sizeof(tweak_length_block));
tweak_length_block[0] = cpu_to_le64(TWEAK_SIZE * 8 * 2 + 2 + has_remainder);
err = crypto_shash_init(shash);
if (err)
return err;
err = crypto_shash_update(shash, (u8 *)tweak_length_block,
POLYVAL_BLOCK_SIZE);
if (err)
return err;
return crypto_shash_export(shash, hctr2_hashed_tweaklen(tctx, has_remainder));
}
static int hctr2_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
u8 hbar[BLOCKCIPHER_BLOCK_SIZE];
int err;
crypto_cipher_clear_flags(tctx->blockcipher, CRYPTO_TFM_REQ_MASK);
crypto_cipher_set_flags(tctx->blockcipher,
crypto_skcipher_get_flags(tfm) &
CRYPTO_TFM_REQ_MASK);
err = crypto_cipher_setkey(tctx->blockcipher, key, keylen);
if (err)
return err;
crypto_skcipher_clear_flags(tctx->xctr, CRYPTO_TFM_REQ_MASK);
crypto_skcipher_set_flags(tctx->xctr,
crypto_skcipher_get_flags(tfm) &
CRYPTO_TFM_REQ_MASK);
err = crypto_skcipher_setkey(tctx->xctr, key, keylen);
if (err)
return err;
memset(hbar, 0, sizeof(hbar));
crypto_cipher_encrypt_one(tctx->blockcipher, hbar, hbar);
memset(tctx->L, 0, sizeof(tctx->L));
tctx->L[0] = 0x01;
crypto_cipher_encrypt_one(tctx->blockcipher, tctx->L, tctx->L);
crypto_shash_clear_flags(tctx->polyval, CRYPTO_TFM_REQ_MASK);
crypto_shash_set_flags(tctx->polyval, crypto_skcipher_get_flags(tfm) &
CRYPTO_TFM_REQ_MASK);
err = crypto_shash_setkey(tctx->polyval, hbar, BLOCKCIPHER_BLOCK_SIZE);
if (err)
return err;
memzero_explicit(hbar, sizeof(hbar));
return hctr2_hash_tweaklen(tctx, true) ?: hctr2_hash_tweaklen(tctx, false);
}
static int hctr2_hash_tweak(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
const struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
struct hctr2_request_ctx *rctx = skcipher_request_ctx(req);
struct shash_desc *hash_desc = &rctx->u.hash_desc;
int err;
bool has_remainder = req->cryptlen % POLYVAL_BLOCK_SIZE;
hash_desc->tfm = tctx->polyval;
err = crypto_shash_import(hash_desc, hctr2_hashed_tweaklen(tctx, has_remainder));
if (err)
return err;
err = crypto_shash_update(hash_desc, req->iv, TWEAK_SIZE);
if (err)
return err;
// Store the hashed tweak, since we need it when computing both
// H(T || N) and H(T || V).
return crypto_shash_export(hash_desc, hctr2_hashed_tweak(tctx, rctx));
}
static int hctr2_hash_message(struct skcipher_request *req,
struct scatterlist *sgl,
u8 digest[POLYVAL_DIGEST_SIZE])
{
static const u8 padding[BLOCKCIPHER_BLOCK_SIZE] = { 0x1 };
struct hctr2_request_ctx *rctx = skcipher_request_ctx(req);
struct shash_desc *hash_desc = &rctx->u.hash_desc;
const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
struct sg_mapping_iter miter;
unsigned int remainder = bulk_len % BLOCKCIPHER_BLOCK_SIZE;
int i;
int err = 0;
int n = 0;
sg_miter_start(&miter, sgl, sg_nents(sgl),
SG_MITER_FROM_SG | SG_MITER_ATOMIC);
for (i = 0; i < bulk_len; i += n) {
sg_miter_next(&miter);
n = min_t(unsigned int, miter.length, bulk_len - i);
err = crypto_shash_update(hash_desc, miter.addr, n);
if (err)
break;
}
sg_miter_stop(&miter);
if (err)
return err;
if (remainder) {
err = crypto_shash_update(hash_desc, padding,
BLOCKCIPHER_BLOCK_SIZE - remainder);
if (err)
return err;
}
return crypto_shash_final(hash_desc, digest);
}
static int hctr2_finish(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
const struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
struct hctr2_request_ctx *rctx = skcipher_request_ctx(req);
u8 digest[POLYVAL_DIGEST_SIZE];
struct shash_desc *hash_desc = &rctx->u.hash_desc;
int err;
// U = UU ^ H(T || V)
// or M = MM ^ H(T || N)
hash_desc->tfm = tctx->polyval;
err = crypto_shash_import(hash_desc, hctr2_hashed_tweak(tctx, rctx));
if (err)
return err;
err = hctr2_hash_message(req, rctx->bulk_part_dst, digest);
if (err)
return err;
crypto_xor(rctx->first_block, digest, BLOCKCIPHER_BLOCK_SIZE);
// Copy U (or M) into dst scatterlist
scatterwalk_map_and_copy(rctx->first_block, req->dst,
0, BLOCKCIPHER_BLOCK_SIZE, 1);
return 0;
}
static void hctr2_xctr_done(struct crypto_async_request *areq,
int err)
{
struct skcipher_request *req = areq->data;
if (!err)
err = hctr2_finish(req);
skcipher_request_complete(req, err);
}
static int hctr2_crypt(struct skcipher_request *req, bool enc)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
const struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
struct hctr2_request_ctx *rctx = skcipher_request_ctx(req);
u8 digest[POLYVAL_DIGEST_SIZE];
int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
int err;
// Requests must be at least one block
if (req->cryptlen < BLOCKCIPHER_BLOCK_SIZE)
return -EINVAL;
// Copy M (or U) into a temporary buffer
scatterwalk_map_and_copy(rctx->first_block, req->src,
0, BLOCKCIPHER_BLOCK_SIZE, 0);
// Create scatterlists for N and V
rctx->bulk_part_src = scatterwalk_ffwd(rctx->sg_src, req->src,
BLOCKCIPHER_BLOCK_SIZE);
rctx->bulk_part_dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
BLOCKCIPHER_BLOCK_SIZE);
// MM = M ^ H(T || N)
// or UU = U ^ H(T || V)
err = hctr2_hash_tweak(req);
if (err)
return err;
err = hctr2_hash_message(req, rctx->bulk_part_src, digest);
if (err)
return err;
crypto_xor(digest, rctx->first_block, BLOCKCIPHER_BLOCK_SIZE);
// UU = E(MM)
// or MM = D(UU)
if (enc)
crypto_cipher_encrypt_one(tctx->blockcipher, rctx->first_block,
digest);
else
crypto_cipher_decrypt_one(tctx->blockcipher, rctx->first_block,
digest);
// S = MM ^ UU ^ L
crypto_xor(digest, rctx->first_block, BLOCKCIPHER_BLOCK_SIZE);
crypto_xor_cpy(rctx->xctr_iv, digest, tctx->L, BLOCKCIPHER_BLOCK_SIZE);
// V = XCTR(S, N)
// or N = XCTR(S, V)
skcipher_request_set_tfm(&rctx->u.xctr_req, tctx->xctr);
skcipher_request_set_crypt(&rctx->u.xctr_req, rctx->bulk_part_src,
rctx->bulk_part_dst, bulk_len,
rctx->xctr_iv);
skcipher_request_set_callback(&rctx->u.xctr_req,
req->base.flags,
hctr2_xctr_done, req);
return crypto_skcipher_encrypt(&rctx->u.xctr_req) ?:
hctr2_finish(req);
}
static int hctr2_encrypt(struct skcipher_request *req)
{
return hctr2_crypt(req, true);
}
static int hctr2_decrypt(struct skcipher_request *req)
{
return hctr2_crypt(req, false);
}
static int hctr2_init_tfm(struct crypto_skcipher *tfm)
{
struct skcipher_instance *inst = skcipher_alg_instance(tfm);
struct hctr2_instance_ctx *ictx = skcipher_instance_ctx(inst);
struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
struct crypto_skcipher *xctr;
struct crypto_cipher *blockcipher;
struct crypto_shash *polyval;
unsigned int subreq_size;
int err;
xctr = crypto_spawn_skcipher(&ictx->xctr_spawn);
if (IS_ERR(xctr))
return PTR_ERR(xctr);
blockcipher = crypto_spawn_cipher(&ictx->blockcipher_spawn);
if (IS_ERR(blockcipher)) {
err = PTR_ERR(blockcipher);
goto err_free_xctr;
}
polyval = crypto_spawn_shash(&ictx->polyval_spawn);
if (IS_ERR(polyval)) {
err = PTR_ERR(polyval);
goto err_free_blockcipher;
}
tctx->xctr = xctr;
tctx->blockcipher = blockcipher;
tctx->polyval = polyval;
BUILD_BUG_ON(offsetofend(struct hctr2_request_ctx, u) !=
sizeof(struct hctr2_request_ctx));
subreq_size = max(sizeof_field(struct hctr2_request_ctx, u.hash_desc) +
crypto_shash_descsize(polyval),
sizeof_field(struct hctr2_request_ctx, u.xctr_req) +
crypto_skcipher_reqsize(xctr));
tctx->hashed_tweak_offset = offsetof(struct hctr2_request_ctx, u) +
subreq_size;
crypto_skcipher_set_reqsize(tfm, tctx->hashed_tweak_offset +
crypto_shash_statesize(polyval));
return 0;
err_free_blockcipher:
crypto_free_cipher(blockcipher);
err_free_xctr:
crypto_free_skcipher(xctr);
return err;
}
static void hctr2_exit_tfm(struct crypto_skcipher *tfm)
{
struct hctr2_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
crypto_free_cipher(tctx->blockcipher);
crypto_free_skcipher(tctx->xctr);
crypto_free_shash(tctx->polyval);
}
static void hctr2_free_instance(struct skcipher_instance *inst)
{
struct hctr2_instance_ctx *ictx = skcipher_instance_ctx(inst);
crypto_drop_cipher(&ictx->blockcipher_spawn);
crypto_drop_skcipher(&ictx->xctr_spawn);
crypto_drop_shash(&ictx->polyval_spawn);
kfree(inst);
}
static int hctr2_create_common(struct crypto_template *tmpl,
struct rtattr **tb,
const char *xctr_name,
const char *polyval_name)
{
u32 mask;
struct skcipher_instance *inst;
struct hctr2_instance_ctx *ictx;
struct skcipher_alg *xctr_alg;
struct crypto_alg *blockcipher_alg;
struct shash_alg *polyval_alg;
char blockcipher_name[CRYPTO_MAX_ALG_NAME];
int len;
int err;
err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask);
if (err)
return err;
inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL);
if (!inst)
return -ENOMEM;
ictx = skcipher_instance_ctx(inst);
/* Stream cipher, xctr(block_cipher) */
err = crypto_grab_skcipher(&ictx->xctr_spawn,
skcipher_crypto_instance(inst),
xctr_name, 0, mask);
if (err)
goto err_free_inst;
xctr_alg = crypto_spawn_skcipher_alg(&ictx->xctr_spawn);
err = -EINVAL;
if (strncmp(xctr_alg->base.cra_name, "xctr(", 5))
goto err_free_inst;
len = strscpy(blockcipher_name, xctr_alg->base.cra_name + 5,
sizeof(blockcipher_name));
if (len < 1)
goto err_free_inst;
if (blockcipher_name[len - 1] != ')')
goto err_free_inst;
blockcipher_name[len - 1] = 0;
/* Block cipher, e.g. "aes" */
err = crypto_grab_cipher(&ictx->blockcipher_spawn,
skcipher_crypto_instance(inst),
blockcipher_name, 0, mask);
if (err)
goto err_free_inst;
blockcipher_alg = crypto_spawn_cipher_alg(&ictx->blockcipher_spawn);
/* Require blocksize of 16 bytes */
err = -EINVAL;
if (blockcipher_alg->cra_blocksize != BLOCKCIPHER_BLOCK_SIZE)
goto err_free_inst;
/* Polyval ε-∆U hash function */
err = crypto_grab_shash(&ictx->polyval_spawn,
skcipher_crypto_instance(inst),
polyval_name, 0, mask);
if (err)
goto err_free_inst;
polyval_alg = crypto_spawn_shash_alg(&ictx->polyval_spawn);
/* Ensure Polyval is being used */
err = -EINVAL;
if (strcmp(polyval_alg->base.cra_name, "polyval") != 0)
goto err_free_inst;
/* Instance fields */
err = -ENAMETOOLONG;
if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "hctr2(%s)",
blockcipher_alg->cra_name) >= CRYPTO_MAX_ALG_NAME)
goto err_free_inst;
if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
"hctr2_base(%s,%s)",
xctr_alg->base.cra_driver_name,
polyval_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
goto err_free_inst;
inst->alg.base.cra_blocksize = BLOCKCIPHER_BLOCK_SIZE;
inst->alg.base.cra_ctxsize = sizeof(struct hctr2_tfm_ctx) +
polyval_alg->statesize * 2;
inst->alg.base.cra_alignmask = xctr_alg->base.cra_alignmask |
polyval_alg->base.cra_alignmask;
/*
* The hash function is called twice, so it is weighted higher than the
* xctr and blockcipher.
*/
inst->alg.base.cra_priority = (2 * xctr_alg->base.cra_priority +
4 * polyval_alg->base.cra_priority +
blockcipher_alg->cra_priority) / 7;
inst->alg.setkey = hctr2_setkey;
inst->alg.encrypt = hctr2_encrypt;
inst->alg.decrypt = hctr2_decrypt;
inst->alg.init = hctr2_init_tfm;
inst->alg.exit = hctr2_exit_tfm;
inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(xctr_alg);
inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(xctr_alg);
inst->alg.ivsize = TWEAK_SIZE;
inst->free = hctr2_free_instance;
err = skcipher_register_instance(tmpl, inst);
if (err) {
err_free_inst:
hctr2_free_instance(inst);
}
return err;
}
static int hctr2_create_base(struct crypto_template *tmpl, struct rtattr **tb)
{
const char *xctr_name;
const char *polyval_name;
xctr_name = crypto_attr_alg_name(tb[1]);
if (IS_ERR(xctr_name))
return PTR_ERR(xctr_name);
polyval_name = crypto_attr_alg_name(tb[2]);
if (IS_ERR(polyval_name))
return PTR_ERR(polyval_name);
return hctr2_create_common(tmpl, tb, xctr_name, polyval_name);
}
static int hctr2_create(struct crypto_template *tmpl, struct rtattr **tb)
{
const char *blockcipher_name;
char xctr_name[CRYPTO_MAX_ALG_NAME];
blockcipher_name = crypto_attr_alg_name(tb[1]);
if (IS_ERR(blockcipher_name))
return PTR_ERR(blockcipher_name);
if (snprintf(xctr_name, CRYPTO_MAX_ALG_NAME, "xctr(%s)",
blockcipher_name) >= CRYPTO_MAX_ALG_NAME)
return -ENAMETOOLONG;
return hctr2_create_common(tmpl, tb, xctr_name, "polyval");
}
static struct crypto_template hctr2_tmpls[] = {
{
/* hctr2_base(xctr_name, polyval_name) */
.name = "hctr2_base",
.create = hctr2_create_base,
.module = THIS_MODULE,
}, {
/* hctr2(blockcipher_name) */
.name = "hctr2",
.create = hctr2_create,
.module = THIS_MODULE,
}
};
static int __init hctr2_module_init(void)
{
return crypto_register_templates(hctr2_tmpls, ARRAY_SIZE(hctr2_tmpls));
}
static void __exit hctr2_module_exit(void)
{
return crypto_unregister_templates(hctr2_tmpls,
ARRAY_SIZE(hctr2_tmpls));
}
subsys_initcall(hctr2_module_init);
module_exit(hctr2_module_exit);
MODULE_DESCRIPTION("HCTR2 length-preserving encryption mode");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("hctr2");
MODULE_IMPORT_NS(CRYPTO_INTERNAL);

245
crypto/polyval-generic.c Normal file
View File

@ -0,0 +1,245 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* POLYVAL: hash function for HCTR2.
*
* Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
* Copyright (c) 2009 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
* Copyright 2021 Google LLC
*/
/*
* Code based on crypto/ghash-generic.c
*
* POLYVAL is a keyed hash function similar to GHASH. POLYVAL uses a different
* modulus for finite field multiplication which makes hardware accelerated
* implementations on little-endian machines faster. POLYVAL is used in the
* kernel to implement HCTR2, but was originally specified for AES-GCM-SIV
* (RFC 8452).
*
* For more information see:
* Length-preserving encryption with HCTR2:
* https://eprint.iacr.org/2021/1441.pdf
* AES-GCM-SIV: Nonce Misuse-Resistant Authenticated Encryption:
* https://datatracker.ietf.org/doc/html/rfc8452
*
* Like GHASH, POLYVAL is not a cryptographic hash function and should
* not be used outside of crypto modes explicitly designed to use POLYVAL.
*
* This implementation uses a convenient trick involving the GHASH and POLYVAL
* fields. This trick allows multiplication in the POLYVAL field to be
* implemented by using multiplication in the GHASH field as a subroutine. An
* element of the POLYVAL field can be converted to an element of the GHASH
* field by computing x*REVERSE(a), where REVERSE reverses the byte-ordering of
* a. Similarly, an element of the GHASH field can be converted back to the
* POLYVAL field by computing REVERSE(x^{-1}*a). For more information, see:
* https://datatracker.ietf.org/doc/html/rfc8452#appendix-A
*
* By using this trick, we do not need to implement the POLYVAL field for the
* generic implementation.
*
* Warning: this generic implementation is not intended to be used in practice
* and is not constant time. For practical use, a hardware accelerated
* implementation of POLYVAL should be used instead.
*
*/
#include <asm/unaligned.h>
#include <crypto/algapi.h>
#include <crypto/gf128mul.h>
#include <crypto/polyval.h>
#include <crypto/internal/hash.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
struct polyval_tfm_ctx {
struct gf128mul_4k *gf128;
};
struct polyval_desc_ctx {
union {
u8 buffer[POLYVAL_BLOCK_SIZE];
be128 buffer128;
};
u32 bytes;
};
static void copy_and_reverse(u8 dst[POLYVAL_BLOCK_SIZE],
const u8 src[POLYVAL_BLOCK_SIZE])
{
u64 a = get_unaligned((const u64 *)&src[0]);
u64 b = get_unaligned((const u64 *)&src[8]);
put_unaligned(swab64(a), (u64 *)&dst[8]);
put_unaligned(swab64(b), (u64 *)&dst[0]);
}
/*
* Performs multiplication in the POLYVAL field using the GHASH field as a
* subroutine. This function is used as a fallback for hardware accelerated
* implementations when simd registers are unavailable.
*
* Note: This function is not used for polyval-generic, instead we use the 4k
* lookup table implementation for finite field multiplication.
*/
void polyval_mul_non4k(u8 *op1, const u8 *op2)
{
be128 a, b;
// Assume one argument is in Montgomery form and one is not.
copy_and_reverse((u8 *)&a, op1);
copy_and_reverse((u8 *)&b, op2);
gf128mul_x_lle(&a, &a);
gf128mul_lle(&a, &b);
copy_and_reverse(op1, (u8 *)&a);
}
EXPORT_SYMBOL_GPL(polyval_mul_non4k);
/*
* Perform a POLYVAL update using non4k multiplication. This function is used
* as a fallback for hardware accelerated implementations when simd registers
* are unavailable.
*
* Note: This function is not used for polyval-generic, instead we use the 4k
* lookup table implementation of finite field multiplication.
*/
void polyval_update_non4k(const u8 *key, const u8 *in,
size_t nblocks, u8 *accumulator)
{
while (nblocks--) {
crypto_xor(accumulator, in, POLYVAL_BLOCK_SIZE);
polyval_mul_non4k(accumulator, key);
in += POLYVAL_BLOCK_SIZE;
}
}
EXPORT_SYMBOL_GPL(polyval_update_non4k);
static int polyval_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
struct polyval_tfm_ctx *ctx = crypto_shash_ctx(tfm);
be128 k;
if (keylen != POLYVAL_BLOCK_SIZE)
return -EINVAL;
gf128mul_free_4k(ctx->gf128);
BUILD_BUG_ON(sizeof(k) != POLYVAL_BLOCK_SIZE);
copy_and_reverse((u8 *)&k, key);
gf128mul_x_lle(&k, &k);
ctx->gf128 = gf128mul_init_4k_lle(&k);
memzero_explicit(&k, POLYVAL_BLOCK_SIZE);
if (!ctx->gf128)
return -ENOMEM;
return 0;
}
static int polyval_init(struct shash_desc *desc)
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
memset(dctx, 0, sizeof(*dctx));
return 0;
}
static int polyval_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
const struct polyval_tfm_ctx *ctx = crypto_shash_ctx(desc->tfm);
u8 *pos;
u8 tmp[POLYVAL_BLOCK_SIZE];
int n;
if (dctx->bytes) {
n = min(srclen, dctx->bytes);
pos = dctx->buffer + dctx->bytes - 1;
dctx->bytes -= n;
srclen -= n;
while (n--)
*pos-- ^= *src++;
if (!dctx->bytes)
gf128mul_4k_lle(&dctx->buffer128, ctx->gf128);
}
while (srclen >= POLYVAL_BLOCK_SIZE) {
copy_and_reverse(tmp, src);
crypto_xor(dctx->buffer, tmp, POLYVAL_BLOCK_SIZE);
gf128mul_4k_lle(&dctx->buffer128, ctx->gf128);
src += POLYVAL_BLOCK_SIZE;
srclen -= POLYVAL_BLOCK_SIZE;
}
if (srclen) {
dctx->bytes = POLYVAL_BLOCK_SIZE - srclen;
pos = dctx->buffer + POLYVAL_BLOCK_SIZE - 1;
while (srclen--)
*pos-- ^= *src++;
}
return 0;
}
static int polyval_final(struct shash_desc *desc, u8 *dst)
{
struct polyval_desc_ctx *dctx = shash_desc_ctx(desc);
const struct polyval_tfm_ctx *ctx = crypto_shash_ctx(desc->tfm);
if (dctx->bytes)
gf128mul_4k_lle(&dctx->buffer128, ctx->gf128);
copy_and_reverse(dst, dctx->buffer);
return 0;
}
static void polyval_exit_tfm(struct crypto_tfm *tfm)
{
struct polyval_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
gf128mul_free_4k(ctx->gf128);
}
static struct shash_alg polyval_alg = {
.digestsize = POLYVAL_DIGEST_SIZE,
.init = polyval_init,
.update = polyval_update,
.final = polyval_final,
.setkey = polyval_setkey,
.descsize = sizeof(struct polyval_desc_ctx),
.base = {
.cra_name = "polyval",
.cra_driver_name = "polyval-generic",
.cra_priority = 100,
.cra_blocksize = POLYVAL_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct polyval_tfm_ctx),
.cra_module = THIS_MODULE,
.cra_exit = polyval_exit_tfm,
},
};
static int __init polyval_mod_init(void)
{
return crypto_register_shash(&polyval_alg);
}
static void __exit polyval_mod_exit(void)
{
crypto_unregister_shash(&polyval_alg);
}
subsys_initcall(polyval_mod_init);
module_exit(polyval_mod_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("POLYVAL hash function");
MODULE_ALIAS_CRYPTO("polyval");
MODULE_ALIAS_CRYPTO("polyval-generic");

View File

@ -17,6 +17,11 @@ struct rsa_mpi_key {
MPI n;
MPI e;
MPI d;
MPI p;
MPI q;
MPI dp;
MPI dq;
MPI qinv;
};
/*
@ -35,16 +40,49 @@ static int _rsa_enc(const struct rsa_mpi_key *key, MPI c, MPI m)
/*
* RSADP function [RFC3447 sec 5.1.2]
* m = c^d mod n;
* m_1 = c^dP mod p;
* m_2 = c^dQ mod q;
* h = (m_1 - m_2) * qInv mod p;
* m = m_2 + q * h;
*/
static int _rsa_dec(const struct rsa_mpi_key *key, MPI m, MPI c)
static int _rsa_dec_crt(const struct rsa_mpi_key *key, MPI m_or_m1_or_h, MPI c)
{
MPI m2, m12_or_qh;
int ret = -ENOMEM;
/* (1) Validate 0 <= c < n */
if (mpi_cmp_ui(c, 0) < 0 || mpi_cmp(c, key->n) >= 0)
return -EINVAL;
/* (2) m = c^d mod n */
return mpi_powm(m, c, key->d, key->n);
m2 = mpi_alloc(0);
m12_or_qh = mpi_alloc(0);
if (!m2 || !m12_or_qh)
goto err_free_mpi;
/* (2i) m_1 = c^dP mod p */
ret = mpi_powm(m_or_m1_or_h, c, key->dp, key->p);
if (ret)
goto err_free_mpi;
/* (2i) m_2 = c^dQ mod q */
ret = mpi_powm(m2, c, key->dq, key->q);
if (ret)
goto err_free_mpi;
/* (2iii) h = (m_1 - m_2) * qInv mod p */
mpi_sub(m12_or_qh, m_or_m1_or_h, m2);
mpi_mulm(m_or_m1_or_h, m12_or_qh, key->qinv, key->p);
/* (2iv) m = m_2 + q * h */
mpi_mul(m12_or_qh, key->q, m_or_m1_or_h);
mpi_addm(m_or_m1_or_h, m2, m12_or_qh, key->n);
ret = 0;
err_free_mpi:
mpi_free(m12_or_qh);
mpi_free(m2);
return ret;
}
static inline struct rsa_mpi_key *rsa_get_key(struct crypto_akcipher *tfm)
@ -112,7 +150,7 @@ static int rsa_dec(struct akcipher_request *req)
if (!c)
goto err_free_m;
ret = _rsa_dec(pkey, m, c);
ret = _rsa_dec_crt(pkey, m, c);
if (ret)
goto err_free_c;
@ -134,9 +172,19 @@ static void rsa_free_mpi_key(struct rsa_mpi_key *key)
mpi_free(key->d);
mpi_free(key->e);
mpi_free(key->n);
mpi_free(key->p);
mpi_free(key->q);
mpi_free(key->dp);
mpi_free(key->dq);
mpi_free(key->qinv);
key->d = NULL;
key->e = NULL;
key->n = NULL;
key->p = NULL;
key->q = NULL;
key->dp = NULL;
key->dq = NULL;
key->qinv = NULL;
}
static int rsa_check_key_length(unsigned int len)
@ -217,6 +265,26 @@ static int rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
if (!mpi_key->n)
goto err;
mpi_key->p = mpi_read_raw_data(raw_key.p, raw_key.p_sz);
if (!mpi_key->p)
goto err;
mpi_key->q = mpi_read_raw_data(raw_key.q, raw_key.q_sz);
if (!mpi_key->q)
goto err;
mpi_key->dp = mpi_read_raw_data(raw_key.dp, raw_key.dp_sz);
if (!mpi_key->dp)
goto err;
mpi_key->dq = mpi_read_raw_data(raw_key.dq, raw_key.dq_sz);
if (!mpi_key->dq)
goto err;
mpi_key->qinv = mpi_read_raw_data(raw_key.qinv, raw_key.qinv_sz);
if (!mpi_key->qinv)
goto err;
if (rsa_check_key_length(mpi_get_size(mpi_key->n) << 3)) {
rsa_free_mpi_key(mpi_key);
return -EINVAL;

View File

@ -1556,6 +1556,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
ret += tcrypt_test("rfc3686(ctr(aes))");
ret += tcrypt_test("ofb(aes)");
ret += tcrypt_test("cfb(aes)");
ret += tcrypt_test("xctr(aes)");
break;
case 11:
@ -1669,10 +1670,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
ret += tcrypt_test("rmd160");
break;
case 41:
ret += tcrypt_test("blake2s-256");
break;
case 42:
ret += tcrypt_test("blake2b-512");
break;
@ -1729,6 +1726,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
ret += tcrypt_test("ccm(sm4)");
break;
case 57:
ret += tcrypt_test("polyval");
break;
case 100:
ret += tcrypt_test("hmac(md5)");
break;
@ -2186,6 +2187,11 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
16, 16, aead_speed_template_19, num_mb);
break;
case 226:
test_cipher_speed("hctr2(aes)", ENCRYPT, sec, NULL,
0, speed_template_32);
break;
case 300:
if (alg) {
test_hash_speed(alg, sec, generic_hash_speed_template);
@ -2240,10 +2246,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
test_hash_speed("rmd160", sec, generic_hash_speed_template);
if (mode > 300 && mode < 400) break;
fallthrough;
case 316:
test_hash_speed("blake2s-256", sec, generic_hash_speed_template);
if (mode > 300 && mode < 400) break;
fallthrough;
case 317:
test_hash_speed("blake2b-512", sec, generic_hash_speed_template);
if (mode > 300 && mode < 400) break;
@ -2352,10 +2354,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
test_ahash_speed("rmd160", sec, generic_hash_speed_template);
if (mode > 400 && mode < 500) break;
fallthrough;
case 416:
test_ahash_speed("blake2s-256", sec, generic_hash_speed_template);
if (mode > 400 && mode < 500) break;
fallthrough;
case 417:
test_ahash_speed("blake2b-512", sec, generic_hash_speed_template);
if (mode > 400 && mode < 500) break;

View File

@ -4375,30 +4375,6 @@ static const struct alg_test_desc alg_test_descs[] = {
.suite = {
.hash = __VECS(blake2b_512_tv_template)
}
}, {
.alg = "blake2s-128",
.test = alg_test_hash,
.suite = {
.hash = __VECS(blakes2s_128_tv_template)
}
}, {
.alg = "blake2s-160",
.test = alg_test_hash,
.suite = {
.hash = __VECS(blakes2s_160_tv_template)
}
}, {
.alg = "blake2s-224",
.test = alg_test_hash,
.suite = {
.hash = __VECS(blakes2s_224_tv_template)
}
}, {
.alg = "blake2s-256",
.test = alg_test_hash,
.suite = {
.hash = __VECS(blakes2s_256_tv_template)
}
}, {
.alg = "cbc(aes)",
.test = alg_test_skcipher,
@ -5088,6 +5064,14 @@ static const struct alg_test_desc alg_test_descs[] = {
.suite = {
.hash = __VECS(ghash_tv_template)
}
}, {
.alg = "hctr2(aes)",
.generic_driver =
"hctr2_base(xctr(aes-generic),polyval-generic)",
.test = alg_test_skcipher,
.suite = {
.cipher = __VECS(aes_hctr2_tv_template)
}
}, {
.alg = "hmac(md5)",
.test = alg_test_hash,
@ -5342,6 +5326,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.suite = {
.hash = __VECS(poly1305_tv_template)
}
}, {
.alg = "polyval",
.test = alg_test_hash,
.suite = {
.hash = __VECS(polyval_tv_template)
}
}, {
.alg = "rfc3686(ctr(aes))",
.test = alg_test_skcipher,
@ -5548,6 +5538,12 @@ static const struct alg_test_desc alg_test_descs[] = {
.suite = {
.cipher = __VECS(xchacha20_tv_template)
},
}, {
.alg = "xctr(aes)",
.test = alg_test_skcipher,
.suite = {
.cipher = __VECS(aes_xctr_tv_template)
}
}, {
.alg = "xts(aes)",
.generic_driver = "xts(ecb(aes-generic))",

File diff suppressed because it is too large Load Diff

191
crypto/xctr.c Normal file
View File

@ -0,0 +1,191 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* XCTR: XOR Counter mode - Adapted from ctr.c
*
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
* Copyright 2021 Google LLC
*/
/*
* XCTR mode is a blockcipher mode of operation used to implement HCTR2. XCTR is
* closely related to the CTR mode of operation; the main difference is that CTR
* generates the keystream using E(CTR + IV) whereas XCTR generates the
* keystream using E(CTR ^ IV). This allows implementations to avoid dealing
* with multi-limb integers (as is required in CTR mode). XCTR is also specified
* using little-endian arithmetic which makes it slightly faster on LE machines.
*
* See the HCTR2 paper for more details:
* Length-preserving encryption with HCTR2
* (https://eprint.iacr.org/2021/1441.pdf)
*/
#include <crypto/algapi.h>
#include <crypto/internal/cipher.h>
#include <crypto/internal/skcipher.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
/* For now this implementation is limited to 16-byte blocks for simplicity */
#define XCTR_BLOCKSIZE 16
static void crypto_xctr_crypt_final(struct skcipher_walk *walk,
struct crypto_cipher *tfm, u32 byte_ctr)
{
u8 keystream[XCTR_BLOCKSIZE];
const u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
__le32 ctr32 = cpu_to_le32(byte_ctr / XCTR_BLOCKSIZE + 1);
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
crypto_cipher_encrypt_one(tfm, keystream, walk->iv);
crypto_xor_cpy(dst, keystream, src, nbytes);
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
}
static int crypto_xctr_crypt_segment(struct skcipher_walk *walk,
struct crypto_cipher *tfm, u32 byte_ctr)
{
void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
crypto_cipher_alg(tfm)->cia_encrypt;
const u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
__le32 ctr32 = cpu_to_le32(byte_ctr / XCTR_BLOCKSIZE + 1);
do {
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
fn(crypto_cipher_tfm(tfm), dst, walk->iv);
crypto_xor(dst, src, XCTR_BLOCKSIZE);
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
le32_add_cpu(&ctr32, 1);
src += XCTR_BLOCKSIZE;
dst += XCTR_BLOCKSIZE;
} while ((nbytes -= XCTR_BLOCKSIZE) >= XCTR_BLOCKSIZE);
return nbytes;
}
static int crypto_xctr_crypt_inplace(struct skcipher_walk *walk,
struct crypto_cipher *tfm, u32 byte_ctr)
{
void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
crypto_cipher_alg(tfm)->cia_encrypt;
unsigned long alignmask = crypto_cipher_alignmask(tfm);
unsigned int nbytes = walk->nbytes;
u8 *data = walk->src.virt.addr;
u8 tmp[XCTR_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1);
__le32 ctr32 = cpu_to_le32(byte_ctr / XCTR_BLOCKSIZE + 1);
do {
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
fn(crypto_cipher_tfm(tfm), keystream, walk->iv);
crypto_xor(data, keystream, XCTR_BLOCKSIZE);
crypto_xor(walk->iv, (u8 *)&ctr32, sizeof(ctr32));
le32_add_cpu(&ctr32, 1);
data += XCTR_BLOCKSIZE;
} while ((nbytes -= XCTR_BLOCKSIZE) >= XCTR_BLOCKSIZE);
return nbytes;
}
static int crypto_xctr_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_cipher *cipher = skcipher_cipher_simple(tfm);
struct skcipher_walk walk;
unsigned int nbytes;
int err;
u32 byte_ctr = 0;
err = skcipher_walk_virt(&walk, req, false);
while (walk.nbytes >= XCTR_BLOCKSIZE) {
if (walk.src.virt.addr == walk.dst.virt.addr)
nbytes = crypto_xctr_crypt_inplace(&walk, cipher,
byte_ctr);
else
nbytes = crypto_xctr_crypt_segment(&walk, cipher,
byte_ctr);
byte_ctr += walk.nbytes - nbytes;
err = skcipher_walk_done(&walk, nbytes);
}
if (walk.nbytes) {
crypto_xctr_crypt_final(&walk, cipher, byte_ctr);
err = skcipher_walk_done(&walk, 0);
}
return err;
}
static int crypto_xctr_create(struct crypto_template *tmpl, struct rtattr **tb)
{
struct skcipher_instance *inst;
struct crypto_alg *alg;
int err;
inst = skcipher_alloc_instance_simple(tmpl, tb);
if (IS_ERR(inst))
return PTR_ERR(inst);
alg = skcipher_ialg_simple(inst);
/* Block size must be 16 bytes. */
err = -EINVAL;
if (alg->cra_blocksize != XCTR_BLOCKSIZE)
goto out_free_inst;
/* XCTR mode is a stream cipher. */
inst->alg.base.cra_blocksize = 1;
/*
* To simplify the implementation, configure the skcipher walk to only
* give a partial block at the very end, never earlier.
*/
inst->alg.chunksize = alg->cra_blocksize;
inst->alg.encrypt = crypto_xctr_crypt;
inst->alg.decrypt = crypto_xctr_crypt;
err = skcipher_register_instance(tmpl, inst);
if (err) {
out_free_inst:
inst->free(inst);
}
return err;
}
static struct crypto_template crypto_xctr_tmpl = {
.name = "xctr",
.create = crypto_xctr_create,
.module = THIS_MODULE,
};
static int __init crypto_xctr_module_init(void)
{
return crypto_register_template(&crypto_xctr_tmpl);
}
static void __exit crypto_xctr_module_exit(void)
{
crypto_unregister_template(&crypto_xctr_tmpl);
}
subsys_initcall(crypto_xctr_module_init);
module_exit(crypto_xctr_module_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("XCTR block cipher mode of operation");
MODULE_ALIAS_CRYPTO("xctr");
MODULE_IMPORT_NS(CRYPTO_INTERNAL);

View File

@ -170,6 +170,7 @@ static int sun8i_ss_setup_ivs(struct skcipher_request *areq)
while (i >= 0) {
dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE);
memzero_explicit(sf->iv[i], ivsize);
i--;
}
return err;
}

View File

@ -528,25 +528,33 @@ static int allocate_flows(struct sun8i_ss_dev *ss)
ss->flows[i].biv = devm_kmalloc(ss->dev, AES_BLOCK_SIZE,
GFP_KERNEL | GFP_DMA);
if (!ss->flows[i].biv)
if (!ss->flows[i].biv) {
err = -ENOMEM;
goto error_engine;
}
for (j = 0; j < MAX_SG; j++) {
ss->flows[i].iv[j] = devm_kmalloc(ss->dev, AES_BLOCK_SIZE,
GFP_KERNEL | GFP_DMA);
if (!ss->flows[i].iv[j])
if (!ss->flows[i].iv[j]) {
err = -ENOMEM;
goto error_engine;
}
}
/* the padding could be up to two block. */
ss->flows[i].pad = devm_kmalloc(ss->dev, MAX_PAD_SIZE,
GFP_KERNEL | GFP_DMA);
if (!ss->flows[i].pad)
if (!ss->flows[i].pad) {
err = -ENOMEM;
goto error_engine;
}
ss->flows[i].result = devm_kmalloc(ss->dev, SHA256_DIGEST_SIZE,
GFP_KERNEL | GFP_DMA);
if (!ss->flows[i].result)
if (!ss->flows[i].result) {
err = -ENOMEM;
goto error_engine;
}
ss->flows[i].engine = crypto_engine_alloc_init(ss->dev, true);
if (!ss->flows[i].engine) {

View File

@ -30,8 +30,8 @@ static int sun8i_ss_hashkey(struct sun8i_ss_hash_tfm_ctx *tfmctx, const u8 *key,
int ret = 0;
xtfm = crypto_alloc_shash("sha1", 0, CRYPTO_ALG_NEED_FALLBACK);
if (!xtfm)
return -ENOMEM;
if (IS_ERR(xtfm))
return PTR_ERR(xtfm);
len = sizeof(*sdesc) + crypto_shash_descsize(xtfm);
sdesc = kmalloc(len, GFP_KERNEL);
@ -586,7 +586,8 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
rctx->t_dst[k + 1].len = rctx->t_dst[k].len;
}
addr_xpad = dma_map_single(ss->dev, tfmctx->ipad, bs, DMA_TO_DEVICE);
if (dma_mapping_error(ss->dev, addr_xpad)) {
err = dma_mapping_error(ss->dev, addr_xpad);
if (err) {
dev_err(ss->dev, "Fail to create DMA mapping of ipad\n");
goto err_dma_xpad;
}
@ -612,7 +613,8 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq)
goto err_dma_result;
}
addr_xpad = dma_map_single(ss->dev, tfmctx->opad, bs, DMA_TO_DEVICE);
if (dma_mapping_error(ss->dev, addr_xpad)) {
err = dma_mapping_error(ss->dev, addr_xpad);
if (err) {
dev_err(ss->dev, "Fail to create DMA mapping of opad\n");
goto err_dma_xpad;
}

View File

@ -349,8 +349,16 @@ static int atmel_ecc_remove(struct i2c_client *client)
/* Return EBUSY if i2c client already allocated. */
if (atomic_read(&i2c_priv->tfm_count)) {
dev_err(&client->dev, "Device is busy\n");
return -EBUSY;
/*
* After we return here, the memory backing the device is freed.
* That happens no matter what the return value of this function
* is because in the Linux device model there is no error
* handling for unbinding a driver.
* If there is still some action pending, it probably involves
* accessing the freed memory.
*/
dev_emerg(&client->dev, "Device is busy, expect memory corruption.\n");
return 0;
}
crypto_unregister_kpp(&atmel_ecdh_nist_p256);

View File

@ -366,7 +366,7 @@ struct ccp_device {
/* Master lists that all cmds are queued on. Because there can be
* more than one CCP command queue that can process a cmd a separate
* backlog list is neeeded so that the backlog completion call
* backlog list is needed so that the backlog completion call
* completes before the cmd is available for execution.
*/
spinlock_t cmd_lock ____cacheline_aligned;

View File

@ -503,7 +503,7 @@ static int __sev_platform_shutdown_locked(int *error)
struct sev_device *sev = psp_master->sev_data;
int ret;
if (sev->state == SEV_STATE_UNINIT)
if (!sev || sev->state == SEV_STATE_UNINIT)
return 0;
ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
@ -577,6 +577,8 @@ static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
struct sev_user_data_status data;
int ret;
memset(&data, 0, sizeof(data));
ret = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS, &data, &argp->error);
if (ret)
return ret;
@ -630,7 +632,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable)
if (input.length > SEV_FW_BLOB_MAX_SIZE)
return -EFAULT;
blob = kmalloc(input.length, GFP_KERNEL);
blob = kzalloc(input.length, GFP_KERNEL);
if (!blob)
return -ENOMEM;
@ -854,7 +856,7 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp)
input_address = (void __user *)input.address;
if (input.address && input.length) {
id_blob = kmalloc(input.length, GFP_KERNEL);
id_blob = kzalloc(input.length, GFP_KERNEL);
if (!id_blob)
return -ENOMEM;
@ -973,14 +975,14 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable)
if (input.cert_chain_len > SEV_FW_BLOB_MAX_SIZE)
return -EFAULT;
pdh_blob = kmalloc(input.pdh_cert_len, GFP_KERNEL);
pdh_blob = kzalloc(input.pdh_cert_len, GFP_KERNEL);
if (!pdh_blob)
return -ENOMEM;
data.pdh_cert_address = __psp_pa(pdh_blob);
data.pdh_cert_len = input.pdh_cert_len;
cert_blob = kmalloc(input.cert_chain_len, GFP_KERNEL);
cert_blob = kzalloc(input.cert_chain_len, GFP_KERNEL);
if (!cert_blob) {
ret = -ENOMEM;
goto e_free_pdh;

View File

@ -877,13 +877,6 @@ static void qm_pm_put_sync(struct hisi_qm *qm)
pm_runtime_put_autosuspend(dev);
}
static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe)
{
u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
return &qm->qp_array[cqn];
}
static void qm_cq_head_update(struct hisi_qp *qp)
{
if (qp->qp_status.cq_head == QM_Q_DEPTH - 1) {
@ -894,47 +887,37 @@ static void qm_cq_head_update(struct hisi_qp *qp)
}
}
static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
static void qm_poll_req_cb(struct hisi_qp *qp)
{
if (unlikely(atomic_read(&qp->qp_status.flags) == QP_STOP))
return;
struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
struct hisi_qm *qm = qp->qm;
if (qp->event_cb) {
qp->event_cb(qp);
return;
}
if (qp->req_cb) {
struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
dma_rmb();
qp->req_cb(qp, qp->sqe + qm->sqe_size *
le16_to_cpu(cqe->sq_head));
qm_cq_head_update(qp);
cqe = qp->cqe + qp->qp_status.cq_head;
qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
qp->qp_status.cq_head, 0);
atomic_dec(&qp->qp_status.used);
}
/* set c_flag */
while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
dma_rmb();
qp->req_cb(qp, qp->sqe + qm->sqe_size *
le16_to_cpu(cqe->sq_head));
qm_cq_head_update(qp);
cqe = qp->cqe + qp->qp_status.cq_head;
qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
qp->qp_status.cq_head, 1);
qp->qp_status.cq_head, 0);
atomic_dec(&qp->qp_status.used);
}
/* set c_flag */
qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ, qp->qp_status.cq_head, 1);
}
static void qm_work_process(struct work_struct *work)
static int qm_get_complete_eqe_num(struct hisi_qm_poll_data *poll_data)
{
struct hisi_qm *qm = container_of(work, struct hisi_qm, work);
struct hisi_qm *qm = poll_data->qm;
struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
struct hisi_qp *qp;
int eqe_num = 0;
u16 cqn;
while (QM_EQE_PHASE(eqe) == qm->status.eqc_phase) {
cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
poll_data->qp_finish_id[eqe_num] = cqn;
eqe_num++;
qp = qm_to_hisi_qp(qm, eqe);
qm_poll_qp(qp, qm);
if (qm->status.eq_head == QM_EQ_DEPTH - 1) {
qm->status.eqc_phase = !qm->status.eqc_phase;
@ -945,37 +928,70 @@ static void qm_work_process(struct work_struct *work)
qm->status.eq_head++;
}
if (eqe_num == QM_EQ_DEPTH / 2 - 1) {
eqe_num = 0;
qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
}
if (eqe_num == (QM_EQ_DEPTH >> 1) - 1)
break;
}
qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
return eqe_num;
}
static irqreturn_t do_qm_irq(int irq, void *data)
static void qm_work_process(struct work_struct *work)
{
struct hisi_qm *qm = (struct hisi_qm *)data;
struct hisi_qm_poll_data *poll_data =
container_of(work, struct hisi_qm_poll_data, work);
struct hisi_qm *qm = poll_data->qm;
struct hisi_qp *qp;
int eqe_num, i;
/* the workqueue created by device driver of QM */
if (qm->wq)
queue_work(qm->wq, &qm->work);
else
schedule_work(&qm->work);
/* Get qp id of completed tasks and re-enable the interrupt. */
eqe_num = qm_get_complete_eqe_num(poll_data);
for (i = eqe_num - 1; i >= 0; i--) {
qp = &qm->qp_array[poll_data->qp_finish_id[i]];
if (unlikely(atomic_read(&qp->qp_status.flags) == QP_STOP))
continue;
return IRQ_HANDLED;
if (qp->event_cb) {
qp->event_cb(qp);
continue;
}
if (likely(qp->req_cb))
qm_poll_req_cb(qp);
}
}
static bool do_qm_irq(struct hisi_qm *qm)
{
struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
struct hisi_qm_poll_data *poll_data;
u16 cqn;
if (!readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
return false;
if (QM_EQE_PHASE(eqe) == qm->status.eqc_phase) {
cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
poll_data = &qm->poll_data[cqn];
queue_work(qm->wq, &poll_data->work);
return true;
}
return false;
}
static irqreturn_t qm_irq(int irq, void *data)
{
struct hisi_qm *qm = data;
bool ret;
if (readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
return do_qm_irq(irq, data);
ret = do_qm_irq(qm);
if (ret)
return IRQ_HANDLED;
atomic64_inc(&qm->debug.dfx.err_irq_cnt);
dev_err(&qm->pdev->dev, "invalid int source\n");
qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
return IRQ_NONE;
@ -3134,11 +3150,8 @@ static int qm_stop_qp_nolock(struct hisi_qp *qp)
if (ret)
dev_err(dev, "Failed to drain out data for stopping!\n");
if (qp->qm->wq)
flush_workqueue(qp->qm->wq);
else
flush_work(&qp->qm->work);
flush_workqueue(qp->qm->wq);
if (unlikely(qp->is_resetting && atomic_read(&qp->qp_status.used)))
qp_stop_fail_cb(qp);
@ -3557,8 +3570,10 @@ static void hisi_qp_memory_uninit(struct hisi_qm *qm, int num)
for (i = num - 1; i >= 0; i--) {
qdma = &qm->qp_array[i].qdma;
dma_free_coherent(dev, qdma->size, qdma->va, qdma->dma);
kfree(qm->poll_data[i].qp_finish_id);
}
kfree(qm->poll_data);
kfree(qm->qp_array);
}
@ -3567,12 +3582,18 @@ static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id)
struct device *dev = &qm->pdev->dev;
size_t off = qm->sqe_size * QM_Q_DEPTH;
struct hisi_qp *qp;
int ret = -ENOMEM;
qm->poll_data[id].qp_finish_id = kcalloc(qm->qp_num, sizeof(u16),
GFP_KERNEL);
if (!qm->poll_data[id].qp_finish_id)
return -ENOMEM;
qp = &qm->qp_array[id];
qp->qdma.va = dma_alloc_coherent(dev, dma_size, &qp->qdma.dma,
GFP_KERNEL);
if (!qp->qdma.va)
return -ENOMEM;
goto err_free_qp_finish_id;
qp->sqe = qp->qdma.va;
qp->sqe_dma = qp->qdma.dma;
@ -3583,6 +3604,10 @@ static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id)
qp->qp_id = id;
return 0;
err_free_qp_finish_id:
kfree(qm->poll_data[id].qp_finish_id);
return ret;
}
static void hisi_qm_pre_init(struct hisi_qm *qm)
@ -3672,6 +3697,26 @@ static void qm_last_regs_uninit(struct hisi_qm *qm)
debug->qm_last_words = NULL;
}
static void hisi_qm_unint_work(struct hisi_qm *qm)
{
destroy_workqueue(qm->wq);
}
static void hisi_qm_memory_uninit(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;
hisi_qp_memory_uninit(qm, qm->qp_num);
if (qm->qdma.va) {
hisi_qm_cache_wb(qm);
dma_free_coherent(dev, qm->qdma.size,
qm->qdma.va, qm->qdma.dma);
}
idr_destroy(&qm->qp_idr);
kfree(qm->factor);
}
/**
* hisi_qm_uninit() - Uninitialize qm.
* @qm: The qm needed uninit.
@ -3680,13 +3725,10 @@ static void qm_last_regs_uninit(struct hisi_qm *qm)
*/
void hisi_qm_uninit(struct hisi_qm *qm)
{
struct pci_dev *pdev = qm->pdev;
struct device *dev = &pdev->dev;
qm_last_regs_uninit(qm);
qm_cmd_uninit(qm);
kfree(qm->factor);
hisi_qm_unint_work(qm);
down_write(&qm->qps_lock);
if (!qm_avail_state(qm, QM_CLOSE)) {
@ -3694,14 +3736,7 @@ void hisi_qm_uninit(struct hisi_qm *qm)
return;
}
hisi_qp_memory_uninit(qm, qm->qp_num);
idr_destroy(&qm->qp_idr);
if (qm->qdma.va) {
hisi_qm_cache_wb(qm);
dma_free_coherent(dev, qm->qdma.size,
qm->qdma.va, qm->qdma.dma);
}
hisi_qm_memory_uninit(qm);
hisi_qm_set_state(qm, QM_NOT_READY);
up_write(&qm->qps_lock);
@ -6018,14 +6053,28 @@ static int hisi_qm_pci_init(struct hisi_qm *qm)
return ret;
}
static void hisi_qm_init_work(struct hisi_qm *qm)
static int hisi_qm_init_work(struct hisi_qm *qm)
{
INIT_WORK(&qm->work, qm_work_process);
int i;
for (i = 0; i < qm->qp_num; i++)
INIT_WORK(&qm->poll_data[i].work, qm_work_process);
if (qm->fun_type == QM_HW_PF)
INIT_WORK(&qm->rst_work, hisi_qm_controller_reset);
if (qm->ver > QM_HW_V2)
INIT_WORK(&qm->cmd_process, qm_cmd_process);
qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM |
WQ_UNBOUND, num_online_cpus(),
pci_name(qm->pdev));
if (!qm->wq) {
pci_err(qm->pdev, "failed to alloc workqueue!\n");
return -ENOMEM;
}
return 0;
}
static int hisi_qp_alloc_memory(struct hisi_qm *qm)
@ -6038,11 +6087,18 @@ static int hisi_qp_alloc_memory(struct hisi_qm *qm)
if (!qm->qp_array)
return -ENOMEM;
qm->poll_data = kcalloc(qm->qp_num, sizeof(struct hisi_qm_poll_data), GFP_KERNEL);
if (!qm->poll_data) {
kfree(qm->qp_array);
return -ENOMEM;
}
/* one more page for device or qp statuses */
qp_dma_size = qm->sqe_size * QM_Q_DEPTH +
sizeof(struct qm_cqe) * QM_Q_DEPTH;
qp_dma_size = PAGE_ALIGN(qp_dma_size) + PAGE_SIZE;
for (i = 0; i < qm->qp_num; i++) {
qm->poll_data[i].qm = qm;
ret = hisi_qp_memory_init(qm, qp_dma_size, i);
if (ret)
goto err_init_qp_mem;
@ -6176,7 +6232,10 @@ int hisi_qm_init(struct hisi_qm *qm)
if (ret)
goto err_alloc_uacce;
hisi_qm_init_work(qm);
ret = hisi_qm_init_work(qm);
if (ret)
goto err_free_qm_memory;
qm_cmd_init(qm);
atomic_set(&qm->status.flags, QM_INIT);
@ -6184,6 +6243,8 @@ int hisi_qm_init(struct hisi_qm *qm)
return 0;
err_free_qm_memory:
hisi_qm_memory_uninit(qm);
err_alloc_uacce:
if (qm->use_sva) {
uacce_remove(qm->uacce);

View File

@ -143,10 +143,10 @@ struct sec_ctx {
/* Threshold for fake busy, trigger to return -EBUSY to user */
u32 fake_req_limit;
/* Currrent cyclic index to select a queue for encipher */
/* Current cyclic index to select a queue for encipher */
atomic_t enc_qcyclic;
/* Currrent cyclic index to select a queue for decipher */
/* Current cyclic index to select a queue for decipher */
atomic_t dec_qcyclic;
enum sec_alg_type alg_type;

View File

@ -508,16 +508,17 @@ static int sec_engine_init(struct hisi_qm *qm)
writel(SEC_SAA_ENABLE, qm->io_base + SEC_SAA_EN_REG);
/* HW V2 enable sm4 extra mode, as ctr/ecb */
if (qm->ver < QM_HW_V3)
if (qm->ver < QM_HW_V3) {
/* HW V2 enable sm4 extra mode, as ctr/ecb */
writel_relaxed(SEC_BD_ERR_CHK_EN0,
qm->io_base + SEC_BD_ERR_CHK_EN_REG0);
/* Enable sm4 xts mode multiple iv */
writel_relaxed(SEC_BD_ERR_CHK_EN1,
qm->io_base + SEC_BD_ERR_CHK_EN_REG1);
writel_relaxed(SEC_BD_ERR_CHK_EN3,
qm->io_base + SEC_BD_ERR_CHK_EN_REG3);
/* HW V2 enable sm4 xts mode multiple iv */
writel_relaxed(SEC_BD_ERR_CHK_EN1,
qm->io_base + SEC_BD_ERR_CHK_EN_REG1);
writel_relaxed(SEC_BD_ERR_CHK_EN3,
qm->io_base + SEC_BD_ERR_CHK_EN_REG3);
}
/* config endian */
sec_set_endian(qm);
@ -1002,8 +1003,6 @@ static int sec_pf_probe_init(struct sec_dev *sec)
static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
{
int ret;
qm->pdev = pdev;
qm->ver = pdev->revision;
qm->algs = "cipher\ndigest\naead";
@ -1029,25 +1028,7 @@ static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
qm->qp_num = SEC_QUEUE_NUM_V1 - SEC_PF_DEF_Q_NUM;
}
/*
* WQ_HIGHPRI: SEC request must be low delayed,
* so need a high priority workqueue.
* WQ_UNBOUND: SEC task is likely with long
* running CPU intensive workloads.
*/
qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM |
WQ_UNBOUND, num_online_cpus(),
pci_name(qm->pdev));
if (!qm->wq) {
pci_err(qm->pdev, "fail to alloc workqueue\n");
return -ENOMEM;
}
ret = hisi_qm_init(qm);
if (ret)
destroy_workqueue(qm->wq);
return ret;
return hisi_qm_init(qm);
}
static void sec_qm_uninit(struct hisi_qm *qm)
@ -1078,8 +1059,6 @@ static int sec_probe_init(struct sec_dev *sec)
static void sec_probe_uninit(struct hisi_qm *qm)
{
hisi_qm_dev_err_uninit(qm);
destroy_workqueue(qm->wq);
}
static void sec_iommu_used_check(struct sec_dev *sec)

View File

@ -185,7 +185,7 @@ static int hisi_trng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
struct hisi_trng *trng;
int currsize = 0;
u32 val = 0;
u32 ret;
int ret;
trng = container_of(rng, struct hisi_trng, rng);

View File

@ -990,8 +990,6 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
{
int ret;
qm->pdev = pdev;
qm->ver = pdev->revision;
if (pdev->revision >= QM_HW_V3)
@ -1021,25 +1019,12 @@ static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
qm->qp_num = HZIP_QUEUE_NUM_V1 - HZIP_PF_DEF_Q_NUM;
}
qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM |
WQ_UNBOUND, num_online_cpus(),
pci_name(qm->pdev));
if (!qm->wq) {
pci_err(qm->pdev, "fail to alloc workqueue\n");
return -ENOMEM;
}
ret = hisi_qm_init(qm);
if (ret)
destroy_workqueue(qm->wq);
return ret;
return hisi_qm_init(qm);
}
static void hisi_zip_qm_uninit(struct hisi_qm *qm)
{
hisi_qm_uninit(qm);
destroy_workqueue(qm->wq);
}
static int hisi_zip_probe_init(struct hisi_zip *hisi_zip)

View File

@ -51,11 +51,47 @@ static const struct devlink_param otx2_cpt_dl_params[] = {
NULL),
};
static int otx2_cpt_devlink_info_get(struct devlink *devlink,
static int otx2_cpt_dl_info_firmware_version_put(struct devlink_info_req *req,
struct otx2_cpt_eng_grp_info grp[],
const char *ver_name, int eng_type)
{
struct otx2_cpt_engs_rsvd *eng;
int i;
for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) {
eng = find_engines_by_type(&grp[i], eng_type);
if (eng)
return devlink_info_version_running_put(req, ver_name,
eng->ucode->ver_str);
}
return 0;
}
static int otx2_cpt_devlink_info_get(struct devlink *dl,
struct devlink_info_req *req,
struct netlink_ext_ack *extack)
{
return devlink_info_driver_name_put(req, "rvu_cptpf");
struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl);
struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf;
int err;
err = devlink_info_driver_name_put(req, "rvu_cptpf");
if (err)
return err;
err = otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp,
"fw.ae", OTX2_CPT_AE_TYPES);
if (err)
return err;
err = otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp,
"fw.se", OTX2_CPT_SE_TYPES);
if (err)
return err;
return otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp,
"fw.ie", OTX2_CPT_IE_TYPES);
}
static const struct devlink_ops otx2_cpt_devlink_ops = {

View File

@ -476,7 +476,7 @@ static int cpt_ucode_load_fw(struct pci_dev *pdev, struct fw_info_t *fw_info)
return ret;
}
static struct otx2_cpt_engs_rsvd *find_engines_by_type(
struct otx2_cpt_engs_rsvd *find_engines_by_type(
struct otx2_cpt_eng_grp_info *eng_grp,
int eng_type)
{
@ -1605,7 +1605,10 @@ int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf,
if (!strncasecmp(val, "se", 2) && strchr(val, ':')) {
if (has_se || ucode_idx)
goto err_print;
tmp = strim(strsep(&val, ":"));
tmp = strsep(&val, ":");
if (!tmp)
goto err_print;
tmp = strim(tmp);
if (!val)
goto err_print;
if (strlen(tmp) != 2)
@ -1617,7 +1620,10 @@ int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf,
} else if (!strncasecmp(val, "ae", 2) && strchr(val, ':')) {
if (has_ae || ucode_idx)
goto err_print;
tmp = strim(strsep(&val, ":"));
tmp = strsep(&val, ":");
if (!tmp)
goto err_print;
tmp = strim(tmp);
if (!val)
goto err_print;
if (strlen(tmp) != 2)
@ -1629,7 +1635,10 @@ int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf,
} else if (!strncasecmp(val, "ie", 2) && strchr(val, ':')) {
if (has_ie || ucode_idx)
goto err_print;
tmp = strim(strsep(&val, ":"));
tmp = strsep(&val, ":");
if (!tmp)
goto err_print;
tmp = strim(tmp);
if (!val)
goto err_print;
if (strlen(tmp) != 2)

View File

@ -166,4 +166,7 @@ int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf,
int otx2_cpt_dl_custom_egrp_delete(struct otx2_cptpf_dev *cptpf,
struct devlink_param_gset_ctx *ctx);
void otx2_cpt_print_uc_dbg_info(struct otx2_cptpf_dev *cptpf);
struct otx2_cpt_engs_rsvd *find_engines_by_type(
struct otx2_cpt_eng_grp_info *eng_grp,
int eng_type);
#endif /* __OTX2_CPTPF_UCODE_H */

View File

@ -17,7 +17,7 @@ config CRYPTO_DEV_QAT
config CRYPTO_DEV_QAT_DH895xCC
tristate "Support for Intel(R) DH895xCC"
depends on X86 && PCI
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
select CRYPTO_DEV_QAT
help
Support for Intel(R) DH895xcc with Intel(R) QuickAssist Technology
@ -28,7 +28,7 @@ config CRYPTO_DEV_QAT_DH895xCC
config CRYPTO_DEV_QAT_C3XXX
tristate "Support for Intel(R) C3XXX"
depends on X86 && PCI
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
select CRYPTO_DEV_QAT
help
Support for Intel(R) C3xxx with Intel(R) QuickAssist Technology
@ -39,7 +39,7 @@ config CRYPTO_DEV_QAT_C3XXX
config CRYPTO_DEV_QAT_C62X
tristate "Support for Intel(R) C62X"
depends on X86 && PCI
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
select CRYPTO_DEV_QAT
help
Support for Intel(R) C62x with Intel(R) QuickAssist Technology
@ -50,7 +50,7 @@ config CRYPTO_DEV_QAT_C62X
config CRYPTO_DEV_QAT_4XXX
tristate "Support for Intel(R) QAT_4XXX"
depends on X86 && PCI
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
select CRYPTO_DEV_QAT
help
Support for Intel(R) QuickAssist Technology QAT_4xxx
@ -61,7 +61,7 @@ config CRYPTO_DEV_QAT_4XXX
config CRYPTO_DEV_QAT_DH895xCCVF
tristate "Support for Intel(R) DH895xCC Virtual Function"
depends on X86 && PCI
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
select PCI_IOV
select CRYPTO_DEV_QAT
@ -74,7 +74,7 @@ config CRYPTO_DEV_QAT_DH895xCCVF
config CRYPTO_DEV_QAT_C3XXXVF
tristate "Support for Intel(R) C3XXX Virtual Function"
depends on X86 && PCI
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
select PCI_IOV
select CRYPTO_DEV_QAT
help
@ -86,7 +86,7 @@ config CRYPTO_DEV_QAT_C3XXXVF
config CRYPTO_DEV_QAT_C62XVF
tristate "Support for Intel(R) C62X Virtual Function"
depends on X86 && PCI
depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST)
select PCI_IOV
select CRYPTO_DEV_QAT
help

View File

@ -49,11 +49,6 @@ struct service_hndl {
struct list_head list;
};
static inline int get_current_node(void)
{
return topology_physical_package_id(raw_smp_processor_id());
}
int adf_service_register(struct service_hndl *service);
int adf_service_unregister(struct service_hndl *service);

View File

@ -605,7 +605,7 @@ static int qat_alg_aead_newkey(struct crypto_aead *tfm, const u8 *key,
{
struct qat_alg_aead_ctx *ctx = crypto_aead_ctx(tfm);
struct qat_crypto_instance *inst = NULL;
int node = get_current_node();
int node = numa_node_id();
struct device *dev;
int ret;
@ -1065,7 +1065,7 @@ static int qat_alg_skcipher_newkey(struct qat_alg_skcipher_ctx *ctx,
{
struct qat_crypto_instance *inst = NULL;
struct device *dev;
int node = get_current_node();
int node = numa_node_id();
int ret;
inst = qat_crypto_get_instance_node(node);

View File

@ -489,7 +489,7 @@ static int qat_dh_init_tfm(struct crypto_kpp *tfm)
{
struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
struct qat_crypto_instance *inst =
qat_crypto_get_instance_node(get_current_node());
qat_crypto_get_instance_node(numa_node_id());
if (!inst)
return -EINVAL;
@ -1225,7 +1225,7 @@ static int qat_rsa_init_tfm(struct crypto_akcipher *tfm)
{
struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
struct qat_crypto_instance *inst =
qat_crypto_get_instance_node(get_current_node());
qat_crypto_get_instance_node(numa_node_id());
if (!inst)
return -EINVAL;

View File

@ -31,7 +31,7 @@
#define FSCRYPT_CONTEXT_V2 2
/* Keep this in sync with include/uapi/linux/fscrypt.h */
#define FSCRYPT_MODE_MAX FSCRYPT_MODE_ADIANTUM
#define FSCRYPT_MODE_MAX FSCRYPT_MODE_AES_256_HCTR2
struct fscrypt_context_v1 {
u8 version; /* FSCRYPT_CONTEXT_V1 */

View File

@ -53,6 +53,13 @@ struct fscrypt_mode fscrypt_modes[] = {
.ivsize = 32,
.blk_crypto_mode = BLK_ENCRYPTION_MODE_ADIANTUM,
},
[FSCRYPT_MODE_AES_256_HCTR2] = {
.friendly_name = "AES-256-HCTR2",
.cipher_str = "hctr2(aes)",
.keysize = 32,
.security_strength = 32,
.ivsize = 32,
},
};
static DEFINE_MUTEX(fscrypt_mode_key_setup_mutex);

View File

@ -61,7 +61,7 @@ fscrypt_get_dummy_policy(struct super_block *sb)
return sb->s_cop->get_dummy_policy(sb);
}
static bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode)
static bool fscrypt_valid_enc_modes_v1(u32 contents_mode, u32 filenames_mode)
{
if (contents_mode == FSCRYPT_MODE_AES_256_XTS &&
filenames_mode == FSCRYPT_MODE_AES_256_CTS)
@ -78,6 +78,14 @@ static bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode)
return false;
}
static bool fscrypt_valid_enc_modes_v2(u32 contents_mode, u32 filenames_mode)
{
if (contents_mode == FSCRYPT_MODE_AES_256_XTS &&
filenames_mode == FSCRYPT_MODE_AES_256_HCTR2)
return true;
return fscrypt_valid_enc_modes_v1(contents_mode, filenames_mode);
}
static bool supported_direct_key_modes(const struct inode *inode,
u32 contents_mode, u32 filenames_mode)
{
@ -151,7 +159,7 @@ static bool supported_iv_ino_lblk_policy(const struct fscrypt_policy_v2 *policy,
static bool fscrypt_supported_v1_policy(const struct fscrypt_policy_v1 *policy,
const struct inode *inode)
{
if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
if (!fscrypt_valid_enc_modes_v1(policy->contents_encryption_mode,
policy->filenames_encryption_mode)) {
fscrypt_warn(inode,
"Unsupported encryption modes (contents %d, filenames %d)",
@ -187,7 +195,7 @@ static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy,
{
int count = 0;
if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
if (!fscrypt_valid_enc_modes_v2(policy->contents_encryption_mode,
policy->filenames_encryption_mode)) {
fscrypt_warn(inode,
"Unsupported encryption modes (contents %d, filenames %d)",

View File

@ -8,7 +8,6 @@
#define _CRYPTO_INTERNAL_BLAKE2S_H
#include <crypto/blake2s.h>
#include <crypto/internal/hash.h>
#include <linux/string.h>
void blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
@ -19,111 +18,4 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block,
bool blake2s_selftest(void);
static inline void blake2s_set_lastblock(struct blake2s_state *state)
{
state->f[0] = -1;
}
/* Helper functions for BLAKE2s shared by the library and shash APIs */
static __always_inline void
__blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen,
bool force_generic)
{
const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
if (unlikely(!inlen))
return;
if (inlen > fill) {
memcpy(state->buf + state->buflen, in, fill);
if (force_generic)
blake2s_compress_generic(state, state->buf, 1,
BLAKE2S_BLOCK_SIZE);
else
blake2s_compress(state, state->buf, 1,
BLAKE2S_BLOCK_SIZE);
state->buflen = 0;
in += fill;
inlen -= fill;
}
if (inlen > BLAKE2S_BLOCK_SIZE) {
const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
/* Hash one less (full) block than strictly possible */
if (force_generic)
blake2s_compress_generic(state, in, nblocks - 1,
BLAKE2S_BLOCK_SIZE);
else
blake2s_compress(state, in, nblocks - 1,
BLAKE2S_BLOCK_SIZE);
in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
}
memcpy(state->buf + state->buflen, in, inlen);
state->buflen += inlen;
}
static __always_inline void
__blake2s_final(struct blake2s_state *state, u8 *out, bool force_generic)
{
blake2s_set_lastblock(state);
memset(state->buf + state->buflen, 0,
BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
if (force_generic)
blake2s_compress_generic(state, state->buf, 1, state->buflen);
else
blake2s_compress(state, state->buf, 1, state->buflen);
cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
memcpy(out, state->h, state->outlen);
}
/* Helper functions for shash implementations of BLAKE2s */
struct blake2s_tfm_ctx {
u8 key[BLAKE2S_KEY_SIZE];
unsigned int keylen;
};
static inline int crypto_blake2s_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE)
return -EINVAL;
memcpy(tctx->key, key, keylen);
tctx->keylen = keylen;
return 0;
}
static inline int crypto_blake2s_init(struct shash_desc *desc)
{
const struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct blake2s_state *state = shash_desc_ctx(desc);
unsigned int outlen = crypto_shash_digestsize(desc->tfm);
__blake2s_init(state, outlen, tctx->key, tctx->keylen);
return 0;
}
static inline int crypto_blake2s_update(struct shash_desc *desc,
const u8 *in, unsigned int inlen,
bool force_generic)
{
struct blake2s_state *state = shash_desc_ctx(desc);
__blake2s_update(state, in, inlen, force_generic);
return 0;
}
static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out,
bool force_generic)
{
struct blake2s_state *state = shash_desc_ctx(desc);
__blake2s_final(state, out, force_generic);
return 0;
}
#endif /* _CRYPTO_INTERNAL_BLAKE2S_H */

22
include/crypto/polyval.h Normal file
View File

@ -0,0 +1,22 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Common values for the Polyval hash algorithm
*
* Copyright 2021 Google LLC
*/
#ifndef _CRYPTO_POLYVAL_H
#define _CRYPTO_POLYVAL_H
#include <linux/types.h>
#include <linux/crypto.h>
#define POLYVAL_BLOCK_SIZE 16
#define POLYVAL_DIGEST_SIZE 16
void polyval_mul_non4k(u8 *op1, const u8 *op2);
void polyval_update_non4k(const u8 *key, const u8 *in,
size_t nblocks, u8 *accumulator);
#endif

View File

@ -265,6 +265,12 @@ struct hisi_qm_list {
void (*unregister_from_crypto)(struct hisi_qm *qm);
};
struct hisi_qm_poll_data {
struct hisi_qm *qm;
struct work_struct work;
u16 *qp_finish_id;
};
struct hisi_qm {
enum qm_hw_ver ver;
enum qm_fun_type fun_type;
@ -302,6 +308,7 @@ struct hisi_qm {
struct rw_semaphore qps_lock;
struct idr qp_idr;
struct hisi_qp *qp_array;
struct hisi_qm_poll_data *poll_data;
struct mutex mailbox_lock;
@ -312,7 +319,6 @@ struct hisi_qm {
u32 error_mask;
struct workqueue_struct *wq;
struct work_struct work;
struct work_struct rst_work;
struct work_struct cmd_process;

View File

@ -27,7 +27,8 @@
#define FSCRYPT_MODE_AES_128_CBC 5
#define FSCRYPT_MODE_AES_128_CTS 6
#define FSCRYPT_MODE_ADIANTUM 9
/* If adding a mode number > 9, update FSCRYPT_MODE_MAX in fscrypt_private.h */
#define FSCRYPT_MODE_AES_256_HCTR2 10
/* If adding a mode number > 10, update FSCRYPT_MODE_MAX in fscrypt_private.h */
/*
* Legacy policy version; ad-hoc KDF and no key verification.

View File

@ -4,6 +4,8 @@
*/
#include <crypto/internal/blake2s.h>
#include <linux/kernel.h>
#include <linux/random.h>
#include <linux/string.h>
/*
@ -587,5 +589,44 @@ bool __init blake2s_selftest(void)
}
}
for (i = 0; i < 32; ++i) {
enum { TEST_ALIGNMENT = 16 };
u8 unaligned_block[BLAKE2S_BLOCK_SIZE + TEST_ALIGNMENT - 1]
__aligned(TEST_ALIGNMENT);
u8 blocks[BLAKE2S_BLOCK_SIZE * 3];
struct blake2s_state state1, state2;
get_random_bytes(blocks, sizeof(blocks));
get_random_bytes(&state, sizeof(state));
#if defined(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) && \
defined(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S)
memcpy(&state1, &state, sizeof(state1));
memcpy(&state2, &state, sizeof(state2));
blake2s_compress(&state1, blocks, 3, BLAKE2S_BLOCK_SIZE);
blake2s_compress_generic(&state2, blocks, 3, BLAKE2S_BLOCK_SIZE);
if (memcmp(&state1, &state2, sizeof(state1))) {
pr_err("blake2s random compress self-test %d: FAIL\n",
i + 1);
success = false;
}
#endif
memcpy(&state1, &state, sizeof(state1));
blake2s_compress(&state1, blocks, 1, BLAKE2S_BLOCK_SIZE);
for (l = 1; l < TEST_ALIGNMENT; ++l) {
memcpy(unaligned_block + l, blocks,
BLAKE2S_BLOCK_SIZE);
memcpy(&state2, &state, sizeof(state2));
blake2s_compress(&state2, unaligned_block + l, 1,
BLAKE2S_BLOCK_SIZE);
if (memcmp(&state1, &state2, sizeof(state1))) {
pr_err("blake2s random compress align %d self-test %d: FAIL\n",
l, i + 1);
success = false;
}
}
}
return success;
}

View File

@ -16,16 +16,44 @@
#include <linux/init.h>
#include <linux/bug.h>
static inline void blake2s_set_lastblock(struct blake2s_state *state)
{
state->f[0] = -1;
}
void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
{
__blake2s_update(state, in, inlen, false);
const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
if (unlikely(!inlen))
return;
if (inlen > fill) {
memcpy(state->buf + state->buflen, in, fill);
blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
state->buflen = 0;
in += fill;
inlen -= fill;
}
if (inlen > BLAKE2S_BLOCK_SIZE) {
const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
}
memcpy(state->buf + state->buflen, in, inlen);
state->buflen += inlen;
}
EXPORT_SYMBOL(blake2s_update);
void blake2s_final(struct blake2s_state *state, u8 *out)
{
WARN_ON(IS_ENABLED(DEBUG) && !out);
__blake2s_final(state, out, false);
blake2s_set_lastblock(state);
memset(state->buf + state->buflen, 0,
BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
blake2s_compress(state, state->buf, 1, state->buflen);
cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
memcpy(out, state->h, state->outlen);
memzero_explicit(state, sizeof(*state));
}
EXPORT_SYMBOL(blake2s_final);
@ -38,12 +66,7 @@ static int __init blake2s_mod_init(void)
return 0;
}
static void __exit blake2s_mod_exit(void)
{
}
module_init(blake2s_mod_init);
module_exit(blake2s_mod_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("BLAKE2s hash function");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");

View File

@ -138,7 +138,7 @@ void mpi_sub(MPI w, MPI u, MPI v)
mpi_add(w, u, vv);
mpi_free(vv);
}
EXPORT_SYMBOL_GPL(mpi_sub);
void mpi_addm(MPI w, MPI u, MPI v, MPI m)
{

View File

@ -82,6 +82,7 @@ void mpi_mul(MPI w, MPI u, MPI v)
if (tmp_limb)
mpi_free_limb_space(tmp_limb);
}
EXPORT_SYMBOL_GPL(mpi_mul);
void mpi_mulm(MPI w, MPI u, MPI v, MPI m)
{